def get_jobs(self): '''初始化jobs字典''' html_result = getJobsHtml() running_html = re.findall("<h2.*?id=\"running_jobs\">[\s\S]*?<h2.*?id=\"completed_jobs\">", html_result) #Get-job_id : use bs soup = BeautifulSoup(str(running_html)) for ajob in soup.find_all("a"): job_id = ajob.string if not self.job_list.has_key(job_id): self.job_list[job_id] = Job() self.job_list[job_id].job_id = job_id #Get-job map_num & reduce_num mr_details = getList(u"<td id=\"job_.*?<td>NA<\/td><\/tr>", str(running_html).strip() ) for mr_detail in mr_details: #print mr_detail job_ids = getList(u".*jobid=([^&]+).*", str(mr_detail).strip() ) mr_infos = getList(u"<td>(.*?)</td>", str(mr_detail).strip() ) try : job_id = job_ids[0] map_num, reduce_num = mr_infos[1], mr_infos[4] if not self.job_list.has_key(job_id): print "job %s not in dict" % job_id else: self.job_list[job_id].map_num = int(map_num) self.job_list[job_id].reduce_num = int(reduce_num) except Exception , e: print e print "error : %s " % str(mr_detail)
def get_jobs(self): '''初始化jobs字典''' html_result = getJobsHtml() running_html = re.findall( "<h2.*?id=\"running_jobs\">[\s\S]*?<h2.*?id=\"completed_jobs\">", html_result) #Get-job_id : use bs soup = BeautifulSoup(str(running_html)) for ajob in soup.find_all("a"): job_id = ajob.string if not self.job_list.has_key(job_id): self.job_list[job_id] = Job() self.job_list[job_id].job_id = job_id #Get-job map_num & reduce_num mr_details = getList(u"<td id=\"job_.*?<td>NA<\/td><\/tr>", str(running_html).strip()) for mr_detail in mr_details: #print mr_detail job_ids = getList(u".*jobid=([^&]+).*", str(mr_detail).strip()) mr_infos = getList(u"<td>(.*?)</td>", str(mr_detail).strip()) try: job_id = job_ids[0] map_num, reduce_num = mr_infos[1], mr_infos[4] if not self.job_list.has_key(job_id): print "job %s not in dict" % job_id else: self.job_list[job_id].map_num = int(map_num) self.job_list[job_id].reduce_num = int(reduce_num) except Exception, e: print e print "error : %s " % str(mr_detail)
def get_jobs_conf(self): '''获取Job的hive.sql''' for job_id in self.job_list: job_conf_html = getJobsHtml('job_conf',job_id) sql_info = getList(u"hive.query.string.*<td.*>([\s\S]+)\s</td>", str(job_conf_html)) if len(sql_info): self.job_list[job_id].hive_sql = sql_info[0].replace('\n','') else: sql_info = getList(u"hive.query.string.*<td.*>([\s\S]+)</td>[\s\S]+<td width=\"35%\"><b>mapred.working.dir", str(job_conf_html)) if len(sql_info): self.job_list[job_id].hive_sql = sql_info[0].replace('\n','') else: self.job_list[job_id].hive_sql = "no hive sql"
def get_jobs_conf(self): '''获取Job的hive.sql''' for job_id in self.job_list: job_conf_html = getJobsHtml('job_conf', job_id) sql_info = getList(u"hive.query.string.*<td.*>([\s\S]+)\s</td>", str(job_conf_html)) if len(sql_info): self.job_list[job_id].hive_sql = sql_info[0].replace('\n', '') else: sql_info = getList( u"hive.query.string.*<td.*>([\s\S]+)</td>[\s\S]+<td width=\"35%\"><b>mapred.working.dir", str(job_conf_html)) if len(sql_info): self.job_list[job_id].hive_sql = sql_info[0].replace( '\n', '') else: self.job_list[job_id].hive_sql = "no hive sql"
def get_jobs_detail(self): '''获取job的详细配置''' for job_id in self.job_list: job_detail_html = getJobsHtml('job_detail',job_id) job_detail = getList(u"<\/b>(.*?)<br>", str(job_detail_html).strip() ) if len(job_detail) == 11: user, name, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail self.job_list[job_id].job_name = name self.job_list[job_id].submit_host = sm_host self.job_list[job_id].submit_address = sm_address self.job_list[job_id].start_time = start_time self.job_list[job_id].running_time = run_time elif len(job_detail) == 10: user, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail self.job_list[job_id].job_name = job_id self.job_list[job_id].submit_host = sm_host self.job_list[job_id].submit_address = sm_address self.job_list[job_id].start_time = start_time self.job_list[job_id].running_time = run_time else: print "len != 11" print "error : %s " % str(job_detail)
def get_jobs_detail(self): '''获取job的详细配置''' for job_id in self.job_list: job_detail_html = getJobsHtml('job_detail', job_id) job_detail = getList(u"<\/b>(.*?)<br>", str(job_detail_html).strip()) if len(job_detail) == 11: user, name, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail self.job_list[job_id].job_name = name self.job_list[job_id].submit_host = sm_host self.job_list[job_id].submit_address = sm_address self.job_list[job_id].start_time = start_time self.job_list[job_id].running_time = run_time elif len(job_detail) == 10: user, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail self.job_list[job_id].job_name = job_id self.job_list[job_id].submit_host = sm_host self.job_list[job_id].submit_address = sm_address self.job_list[job_id].start_time = start_time self.job_list[job_id].running_time = run_time else: print "len != 11" print "error : %s " % str(job_detail)