def parsefile(jfile): lno=0 try: jf = open(jfile,'r') except: print >> sys.stderr, "Unexpected error opening", jfile sys.exit(0) job=HadoopJobClass() for line in jf: lno+=1 tasktype=line.split(" ",1) if tasktype[0] == "Job": analyze_job(job,tasktype[1]) elif tasktype[0] == "MapAttempt" or tasktype[0] == "ReduceAttempt": result = parsenamevalue(tasktype[1]) if result.has_key("TASK_ATTEMPT_ID"): idx=result["TASK_ATTEMPT_ID"] t=job.get_task(idx) if t==None: t=tasks.taskclass() t.set_taskid(idx) job.add_task(t) tasks.analyze_task(tasktype[0],result,t) elif tasktype[0] == "Task": pass else: print >> sys.stderr, "Unknown Task", tasktype[0], " line numer =",lno return job
def parsefile(jfile): lno = 0 try: jf = open(jfile, "r") except: print >> sys.stderr, "Unexpected error opening", jfile sys.exit(0) job = HadoopJobClass() for line in jf: lno += 1 tasktype = line.split(" ", 1) if tasktype[0] == "Job": analyze_job(job, tasktype[1]) elif tasktype[0] == "MapAttempt" or tasktype[0] == "ReduceAttempt": result = parsenamevalue(tasktype[1]) if result.has_key("TASK_ATTEMPT_ID"): idx = result["TASK_ATTEMPT_ID"] e = 0 for t in task: if idx in t.tid: e = 1 if e == 0: t = tasks.taskclass() # task[idx]=t task.append(t) t.set_taskid(idx) tasklist = tasks.analyze_task(tasktype[0], result, t) elif tasktype[0] == "Task": pass else: print >> sys.stderr, "Unknown Task", tasktype[0], " line numer =", lno stime = job.get_stime() etime = job.get_etime() status = job.get_status() st = time.localtime(stime / 1000) et = time.localtime(etime / 1000) diff = datetime.datetime(et[0], et[1], et[2], et[3], et[4], et[5]) - datetime.datetime( st[0], st[1], st[2], st[3], st[4], st[5] ) smaps = job.get_fin_mapjobs() fmaps = job.get_fail_mapjobs() sreds = job.get_fin_redjobs() freds = job.get_fail_redjobs() mhdfs_read, mhdfs_write, mfs_read, mfs_write, rhdfs_read, rhdfs_write, rfs_read, rfs_write = print_fs_stats(task) print "Status =", status if status == "SUCCESS": print "Log File =", jfile print "Job ID =", job.get_jobid() print "Job Status =", status print "Start Time =", datetime.datetime(st[0], st[1], st[2], st[3], st[4], st[5]).strftime( "%Y/%m/%d-%H:%M:%S" ), "(", stime, ")" print "End Time =", datetime.datetime(et[0], et[1], et[2], et[3], et[4], et[5]).strftime( "%Y/%m/%d-%H:%M:%S" ), "(", etime, ")" print "Time taken =", diff print "Completed Maps =", smaps print "Completed Reduces =", sreds print "Failed Maps =", fmaps print "Failed Reduces =", freds print "HDFS bytes read(Map,Reduce,Total) =", mhdfs_read, rhdfs_read, job.get_hdfs_read() print "HDFS bytes Written(Map,reduce,total) =", mhdfs_write, rhdfs_write, job.get_hdfs_write() print "FS bytes read(map,reduce,total) =", mfs_read, rfs_read, job.get_fs_read() print "FS bytes Written(Map,Reduce,Total) =", mfs_write, rfs_write, job.get_fs_write() if verbose == 1: print_tasks(task)
def parsefile(jfile,cfile,print_detailed,print_summary): lno=0 try: jf = open(jfile,'r') except: print >> sys.stderr, "Unexpected error opening", jfile sys.exit(0) command = "/bin/cat " + cfile + " | /bin/grep mapred.job.name | /usr/bin/xml_grep value --text_only" p = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) mapred_job_name=p.stdout.read() job=HadoopJobClass() for line in jf: lno+=1 tasktype=line.split(" ",1) if tasktype[0] == "Job": analyze_job(job,tasktype[1]) elif tasktype[0] == "MapAttempt" or tasktype[0] == "ReduceAttempt": result = parsenamevalue(tasktype[1]) if result.has_key("TASK_ATTEMPT_ID"): idx=result["TASK_ATTEMPT_ID"] e=0 for t in task: if idx in t.tid: e=1 if e == 0: t=tasks.taskclass() # task[idx]=t task.append(t) t.set_taskid(idx) tasklist= tasks.analyze_task(tasktype[0],result,t) elif tasktype[0] == "Task": pass else: pass # print >> sys.stderr, "Unknown Task", tasktype[0], " line numer =",lno stime = job.get_stime() etime = job.get_etime() status = job.get_status() st=time.localtime(stime/1000) et=time.localtime(etime/1000) diff=datetime.datetime(et[0],et[1],et[2],et[3],et[4],et[5]) - datetime.datetime(st[0],st[1],st[2],st[3],st[4],st[5]) smaps=job.get_fin_mapjobs() fmaps=job.get_fail_mapjobs() sreds=job.get_fin_redjobs() freds=job.get_fail_redjobs() mhdfs_read,mhdfs_write,mfs_read,mfs_write,rhdfs_read,rhdfs_write,rfs_read,rfs_write=print_fs_stats(task) header = [] values = [] header.append("Status"); values.append(status); if status == "SUCCESS" and print_summary == "1": header.append(",mapred.job.name") values.append("," + str(mapred_job_name).rstrip('\n')) header.append(",Log File") values.append("," + jfile); header.append(",Job ID") values.append("," + job.get_jobid()); header.append(",Start Time") values.append(("," + datetime.datetime(st[0],st[1],st[2],st[3],st[4],st[5]).strftime("%m/%d/%Y %H:%M:%S"))) header.append(",End Time") values.append(("," + datetime.datetime(et[0],et[1],et[2],et[3],et[4],et[5]).strftime("%m/%d/%Y %H:%M:%S"))) header.append(",Time taken (Seconds)") values.append(("," + str(diff.seconds + diff.days*24*3600))) header.append(",Map Task Time (Seconds)") values.append((","+ str(get_totaltask_time(task,"MAP")))) header.append(",Reduce Task Time (Seconds)") values.append((","+ str(get_totaltask_time(task,"REDUCE")))) header.append(",Completed Maps") values.append(("," + smaps)); header.append(",Completed Reduces") values.append(("," + sreds)); header.append(",Failed Maps") values.append(("," + fmaps)); header.append(",Failed Reduces") values.append(("," + freds)) header.append(",HDFS bytes read(Map)") values.append(("," + str(mhdfs_read))) header.append(",HDFS bytes read(Reduce)") values.append(("," + str(rhdfs_read))) header.append(",HDFS bytes read(Total)") values.append(("," + str(job.get_hdfs_read()))) header.append(",HDFS bytes written(Map)") values.append(("," + str(mhdfs_write))) header.append(",HDFS bytes written(Reduce)") values.append(("," +str( rhdfs_write))) header.append(",HDFS bytes written(Total)") values.append(("," + str(job.get_hdfs_write()))) header.append(",FS bytes read(Map)") values.append(("," + str(mfs_read))) header.append(",FS bytes read(Reduce)") values.append(("," + str(rfs_read))) header.append(",FS bytes read(Total)") values.append(("," + str(job.get_fs_read()))) header.append(",FS bytes written(Map)") values.append(("," + str(mfs_write))) header.append(",FS bytes written(Reduce)") values.append(("," + str(rfs_read))) header.append(",FS bytes read(Total)") values.append(("," + job.get_fs_write())) header.append(",MAP_START") values.append(("," + str(get_starttime_string(task,"MAP")))) header.append(",MAP_END") values.append(("," + str(get_endtime_string(task,"MAP")))) header.append(",REDUCE_START") values.append(("," + str(get_starttime_string(task,"REDUCE")))) header.append(",REDUCE_END") values.append(("," + str(get_endtime_string(task,"REDUCE")))) header.append(",MAP_TIME_CLOCK(s)") values.append(("," + str(get_deltatimes(task,"MAP")))) header.append(",REDUCE_TIME_CLOCK(s)") values.append(("," + str(get_deltatimes(task,"REDUCE")))) if print_header == "1": print ''.join(header); print ''.join(values); jobid=job.get_jobid() jobname=str(mapred_job_name).rstrip('\n') if status == "SUCCESS" and 0: print "Log File =",jfile print "Job ID =",job.get_jobid() print "Job Status =",status print "Start Time =",datetime.datetime(st[0],st[1],st[2],st[3],st[4],st[5]).strftime("%Y/%m/%d-%H:%M:%S"),"(",stime,")" print "End Time =",datetime.datetime(et[0],et[1],et[2],et[3],et[4],et[5]).strftime("%Y/%m/%d-%H:%M:%S"),"(",etime,")" print "Time taken =",diff print "Completed Maps =",smaps print "Completed Reduces =",sreds print "Failed Maps =",fmaps print "Failed Reduces =",freds print "HDFS bytes read(Map,Reduce,Total) =",mhdfs_read,rhdfs_read,job.get_hdfs_read() print "HDFS bytes Written(Map,reduce,total) =",mhdfs_write,rhdfs_write,job.get_hdfs_write() print "FS bytes read(map,reduce,total) =",mfs_read,rfs_read,job.get_fs_read() print "FS bytes Written(Map,Reduce,Total) =",mfs_write,rfs_write,job.get_fs_write() if verbose ==1 and print_detailed =="1": print_tasks(task,jobid,jobname)