def test_basic(self): lines = [ 'Meta VERSION="1" .\n', 'Job JOBID="job_201601081945_0005" JOB_PRIORITY="NORMAL" .\n', ] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict( fields=dict( VERSION='1' ), start_line=0, num_lines=1, type='Meta', ), dict( fields=dict( JOBID='job_201601081945_0005', JOB_PRIORITY='NORMAL' ), num_lines=1, start_line=1, type='Job', ) ])
def test_task_counters(self): lines = [ 'Task TASKID="task_201601081945_0005_m_000005" TASK_TYPE="SETUP"' ' TASK_STATUS="SUCCESS" FINISH_TIME="1452283612363"' ' COUNTERS="{(FileSystemCounters)(FileSystemCounters)' '[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(27785)]}" .\n', 'Task TASKID="task_201601081945_0005_m_000000" TASK_TYPE="MAP"' ' TASK_STATUS="SUCCESS" FINISH_TIME="1452283651437"' ' COUNTERS="{' "(org\.apache\.hadoop\.mapred\.FileOutputFormat$Counter)" "(File Output Format Counters )" "[(BYTES_WRITTEN)(Bytes Written)(0)]}" "{(FileSystemCounters)(FileSystemCounters)" "[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(27785)]" '[(HDFS_BYTES_READ)(HDFS_BYTES_READ)(248)]}" .\n', ] self.assertEqual( _summarize_pre_yarn_history(_parse_pre_yarn_history_file(lines)), dict( counters={ "FileSystemCounters": {"FILE_BYTES_WRITTEN": 55570, "HDFS_BYTES_READ": 248}, "File Output Format Counters ": {"Bytes Written": 0}, }, errors=[], ), )
def test_task_counters(self): lines = [ 'Task TASKID="task_201601081945_0005_m_000005" TASK_TYPE="SETUP"' ' TASK_STATUS="SUCCESS" FINISH_TIME="1452283612363"' ' COUNTERS="{(FileSystemCounters)(FileSystemCounters)' '[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(27785)]}" .\n', 'Task TASKID="task_201601081945_0005_m_000000" TASK_TYPE="MAP"' ' TASK_STATUS="SUCCESS" FINISH_TIME="1452283651437"' ' COUNTERS="{' '(org\.apache\.hadoop\.mapred\.FileOutputFormat$Counter)' '(File Output Format Counters )' '[(BYTES_WRITTEN)(Bytes Written)(0)]}' '{(FileSystemCounters)(FileSystemCounters)' '[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(27785)]' '[(HDFS_BYTES_READ)(HDFS_BYTES_READ)(248)]}" .\n', ] self.assertEqual( _summarize_pre_yarn_history(_parse_pre_yarn_history_file(lines)), dict( counters={ 'FileSystemCounters': { 'FILE_BYTES_WRITTEN': 55570, 'HDFS_BYTES_READ': 248, }, 'File Output Format Counters ': { 'Bytes Written': 0, }, }, errors=[]))
def test_job_counters(self): lines = [ 'Job JOBID="job_201106092314_0003" FINISH_TIME="1307662284564"' ' JOB_STATUS="SUCCESS" FINISHED_MAPS="2" FINISHED_REDUCES="1"' ' FAILED_MAPS="0" FAILED_REDUCES="0" COUNTERS="' "{(org\.apache\.hadoop\.mapred\.JobInProgress$Counter)" "(Job Counters )" '[(TOTAL_LAUNCHED_REDUCES)(Launched reduce tasks)(1)]}" .\n' ] self.assertEqual( _summarize_pre_yarn_history(_parse_pre_yarn_history_file(lines)), dict(counters={"Job Counters ": {"Launched reduce tasks": 1}}, errors=[]), )
def test_basic(self): lines = ['Meta VERSION="1" .\n', 'Job JOBID="job_201601081945_0005" JOB_PRIORITY="NORMAL" .\n'] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict(fields=dict(VERSION="1"), start_line=0, num_lines=1, type="Meta"), dict( fields=dict(JOBID="job_201601081945_0005", JOB_PRIORITY="NORMAL"), num_lines=1, start_line=1, type="Job", ), ], )
def test_job_counters(self): lines = [ 'Job JOBID="job_201106092314_0003" FINISH_TIME="1307662284564"' ' JOB_STATUS="SUCCESS" FINISHED_MAPS="2" FINISHED_REDUCES="1"' ' FAILED_MAPS="0" FAILED_REDUCES="0" COUNTERS="' '{(org\.apache\.hadoop\.mapred\.JobInProgress$Counter)' '(Job Counters )' '[(TOTAL_LAUNCHED_REDUCES)(Launched reduce tasks)(1)]}" .\n' ] self.assertEqual( _summarize_pre_yarn_history(_parse_pre_yarn_history_file(lines)), dict(counters={'Job Counters ': {'Launched reduce tasks': 1}}, errors=[]))
def test_bad_records(self): # should just silently ignore bad records and yield good ones lines = ["\n", "Foo BAZ .\n", 'Job JOBID="job_201601081945_0005" JOB_PRIORITY="NORMAL" .\n', 'Job JOBID="\n'] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict( fields=dict(JOBID="job_201601081945_0005", JOB_PRIORITY="NORMAL"), num_lines=1, start_line=2, type="Job", ) ], )
def test_errors(self): lines = [ 'MapAttempt TASK_TYPE="MAP"' ' TASKID="task_201601081945_0005_m_000001"' " TASK_ATTEMPT_ID=" '"task_201601081945_0005_m_00000_2"' ' TASK_STATUS="FAILED"' ' ERROR="java\.lang\.RuntimeException:' " PipeMapRed\.waitOutputThreads():" " subprocess failed with code 1\n", " at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed" "\\.waitOutputThreads(PipeMapRed\\.java:372)\n", " at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed" "\\.mapRedFinished(PipeMapRed\\.java:586)\n", '" .\n', ] path = "/history/history.jar" self.assertEqual( _summarize_pre_yarn_history(_parse_pre_yarn_history_file(lines), path=path), dict( counters={}, errors=[ dict( java_error=dict( error=( "java.lang.RuntimeException: PipeMapRed" ".waitOutputThreads():" " subprocess failed with code 1\n" " at org.apache.hadoop.streaming" ".PipeMapRed.waitOutputThreads" "(PipeMapRed.java:372)\n" " at org.apache.hadoop.streaming" ".PipeMapRed.mapRedFinished" "(PipeMapRed.java:586)\n" ), num_lines=4, path=path, start_line=0, ), task_attempt_id="task_201601081945_0005_m_00000_2", ) ], ), )
def test_errors(self): lines = [ 'MapAttempt TASK_TYPE="MAP"' ' TASKID="task_201601081945_0005_m_000001"' ' TASK_ATTEMPT_ID=' '"task_201601081945_0005_m_00000_2"' ' TASK_STATUS="FAILED"' ' ERROR="java\.lang\.RuntimeException:' ' PipeMapRed\.waitOutputThreads():' ' subprocess failed with code 1\n', ' at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed' '\\.waitOutputThreads(PipeMapRed\\.java:372)\n', ' at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed' '\\.mapRedFinished(PipeMapRed\\.java:586)\n', '" .\n', ] path = '/history/history.jar' self.assertEqual( _summarize_pre_yarn_history(_parse_pre_yarn_history_file(lines), path=path), dict( counters={}, errors=[ dict( java_error=dict( error=( 'java.lang.RuntimeException: PipeMapRed' '.waitOutputThreads():' ' subprocess failed with code 1\n' ' at org.apache.hadoop.streaming' '.PipeMapRed.waitOutputThreads' '(PipeMapRed.java:372)\n' ' at org.apache.hadoop.streaming' '.PipeMapRed.mapRedFinished' '(PipeMapRed.java:586)\n'), num_lines=4, path=path, start_line=0, ), task_attempt_id='task_201601081945_0005_m_00000_2', ), ]))
def test_bad_records(self): # should just silently ignore bad records and yield good ones lines = [ '\n', 'Foo BAZ .\n', 'Job JOBID="job_201601081945_0005" JOB_PRIORITY="NORMAL" .\n', 'Job JOBID="\n', ] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict( fields=dict( JOBID='job_201601081945_0005', JOB_PRIORITY='NORMAL' ), num_lines=1, start_line=2, type='Job', ) ])
def test_unescape(self): lines = [ 'Task TASKID="task_201512311928_0001_m_000003" TASK_TYPE="MAP"' ' START_TIME="1451590341378"' ' SPLITS="/default-rack/172\\.31\\.22\\.226" .\n', ] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict( fields=dict( TASKID='task_201512311928_0001_m_000003', TASK_TYPE='MAP', START_TIME='1451590341378', SPLITS='/default-rack/172.31.22.226', ), num_lines=1, start_line=0, type='Task', ), ])
def test_unescape(self): lines = [ 'Task TASKID="task_201512311928_0001_m_000003" TASK_TYPE="MAP"' ' START_TIME="1451590341378"' ' SPLITS="/default-rack/172\\.31\\.22\\.226" .\n' ] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict( fields=dict( TASKID="task_201512311928_0001_m_000003", TASK_TYPE="MAP", START_TIME="1451590341378", SPLITS="/default-rack/172.31.22.226", ), num_lines=1, start_line=0, type="Task", ) ], )
def test_multiline(self): lines = [ 'MapAttempt TASK_TYPE="MAP"' ' TASKID="task_201601081945_0005_m_000001"' ' TASK_STATUS="FAILED"' ' ERROR="java\.lang\.RuntimeException:' " PipeMapRed\.waitOutputThreads():" " subprocess failed with code 1\n", " at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed" "\\.waitOutputThreads(PipeMapRed\\.java:372)\n", " at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed" "\\.mapRedFinished(PipeMapRed\\.java:586)\n", '" .\n', ] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict( fields=dict( ERROR=( "java.lang.RuntimeException: PipeMapRed" ".waitOutputThreads():" " subprocess failed with code 1\n" " at org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:372)\n" " at org.apache.hadoop.streaming.PipeMapRed" ".mapRedFinished(PipeMapRed.java:586)\n" ), TASK_TYPE="MAP", TASKID="task_201601081945_0005_m_000001", TASK_STATUS="FAILED", ), num_lines=4, start_line=0, type="MapAttempt", ) ], )
def test_multiline(self): lines = [ 'MapAttempt TASK_TYPE="MAP"' ' TASKID="task_201601081945_0005_m_000001"' ' TASK_STATUS="FAILED"' ' ERROR="java\.lang\.RuntimeException:' ' PipeMapRed\.waitOutputThreads():' ' subprocess failed with code 1\n', ' at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed' '\\.waitOutputThreads(PipeMapRed\\.java:372)\n', ' at org\\.apache\\.hadoop\\.streaming\\.PipeMapRed' '\\.mapRedFinished(PipeMapRed\\.java:586)\n', '" .\n', ] self.assertEqual( list(_parse_pre_yarn_history_file(lines)), [ dict( fields=dict( ERROR=( 'java.lang.RuntimeException: PipeMapRed' '.waitOutputThreads():' ' subprocess failed with code 1\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:372)\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.mapRedFinished(PipeMapRed.java:586)\n'), TASK_TYPE='MAP', TASKID='task_201601081945_0005_m_000001', TASK_STATUS='FAILED', ), num_lines=4, start_line=0, type='MapAttempt', ), ])
def test_empty(self): self.assertEqual(list(_parse_pre_yarn_history_file([])), [])
def test_empty(self): self.assertEqual(list(_parse_pre_yarn_history_file([])), [])