コード例 #1
0
ファイル: scavenger.py プロジェクト: Jerrick/Scavenger
    def get_jobs(self):
        '''初始化jobs字典'''
        html_result = getJobsHtml()
        running_html = re.findall("<h2.*?id=\"running_jobs\">[\s\S]*?<h2.*?id=\"completed_jobs\">", html_result)

        #Get-job_id : use bs
        soup = BeautifulSoup(str(running_html))
        for ajob in soup.find_all("a"):
            job_id = ajob.string
            if not self.job_list.has_key(job_id):
                self.job_list[job_id] = Job()
                self.job_list[job_id].job_id = job_id

        #Get-job map_num & reduce_num 
        mr_details = getList(u"<td id=\"job_.*?<td>NA<\/td><\/tr>", str(running_html).strip() )
        for mr_detail in mr_details:
            #print mr_detail
            job_ids = getList(u".*jobid=([^&]+).*", str(mr_detail).strip() )
            mr_infos = getList(u"<td>(.*?)</td>", str(mr_detail).strip() )
            try :
                job_id = job_ids[0]
                map_num, reduce_num = mr_infos[1], mr_infos[4]
                if not self.job_list.has_key(job_id):
                    print "job %s not in dict" % job_id
                else:
                    self.job_list[job_id].map_num = int(map_num)
                    self.job_list[job_id].reduce_num = int(reduce_num)
            except Exception , e:
                print e
                print "error : %s " % str(mr_detail)
コード例 #2
0
    def get_jobs(self):
        '''初始化jobs字典'''
        html_result = getJobsHtml()
        running_html = re.findall(
            "<h2.*?id=\"running_jobs\">[\s\S]*?<h2.*?id=\"completed_jobs\">",
            html_result)

        #Get-job_id : use bs
        soup = BeautifulSoup(str(running_html))
        for ajob in soup.find_all("a"):
            job_id = ajob.string
            if not self.job_list.has_key(job_id):
                self.job_list[job_id] = Job()
                self.job_list[job_id].job_id = job_id

        #Get-job map_num & reduce_num
        mr_details = getList(u"<td id=\"job_.*?<td>NA<\/td><\/tr>",
                             str(running_html).strip())
        for mr_detail in mr_details:
            #print mr_detail
            job_ids = getList(u".*jobid=([^&]+).*", str(mr_detail).strip())
            mr_infos = getList(u"<td>(.*?)</td>", str(mr_detail).strip())
            try:
                job_id = job_ids[0]
                map_num, reduce_num = mr_infos[1], mr_infos[4]
                if not self.job_list.has_key(job_id):
                    print "job %s not in dict" % job_id
                else:
                    self.job_list[job_id].map_num = int(map_num)
                    self.job_list[job_id].reduce_num = int(reduce_num)
            except Exception, e:
                print e
                print "error : %s " % str(mr_detail)
コード例 #3
0
ファイル: scavenger.py プロジェクト: Jerrick/Scavenger
 def get_jobs_conf(self):
     '''获取Job的hive.sql'''
     for job_id in self.job_list:
         job_conf_html = getJobsHtml('job_conf',job_id)
         sql_info = getList(u"hive.query.string.*<td.*>([\s\S]+)\s</td>", str(job_conf_html))
         if len(sql_info):
             self.job_list[job_id].hive_sql = sql_info[0].replace('\n','')
         else:
             sql_info = getList(u"hive.query.string.*<td.*>([\s\S]+)</td>[\s\S]+<td width=\"35%\"><b>mapred.working.dir", str(job_conf_html))
             if len(sql_info):
                 self.job_list[job_id].hive_sql = sql_info[0].replace('\n','')
             else:
                 self.job_list[job_id].hive_sql = "no hive sql"
コード例 #4
0
 def get_jobs_conf(self):
     '''获取Job的hive.sql'''
     for job_id in self.job_list:
         job_conf_html = getJobsHtml('job_conf', job_id)
         sql_info = getList(u"hive.query.string.*<td.*>([\s\S]+)\s</td>",
                            str(job_conf_html))
         if len(sql_info):
             self.job_list[job_id].hive_sql = sql_info[0].replace('\n', '')
         else:
             sql_info = getList(
                 u"hive.query.string.*<td.*>([\s\S]+)</td>[\s\S]+<td width=\"35%\"><b>mapred.working.dir",
                 str(job_conf_html))
             if len(sql_info):
                 self.job_list[job_id].hive_sql = sql_info[0].replace(
                     '\n', '')
             else:
                 self.job_list[job_id].hive_sql = "no hive sql"
コード例 #5
0
ファイル: scavenger.py プロジェクト: Jerrick/Scavenger
 def get_jobs_detail(self):
     '''获取job的详细配置'''
     for job_id in self.job_list:
         job_detail_html = getJobsHtml('job_detail',job_id)
         job_detail = getList(u"<\/b>(.*?)<br>", str(job_detail_html).strip() )
         if len(job_detail) == 11:
             user, name, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail 
             self.job_list[job_id].job_name = name
             self.job_list[job_id].submit_host = sm_host
             self.job_list[job_id].submit_address = sm_address
             self.job_list[job_id].start_time = start_time
             self.job_list[job_id].running_time = run_time
         elif len(job_detail) == 10:
             user, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail 
             self.job_list[job_id].job_name = job_id
             self.job_list[job_id].submit_host = sm_host
             self.job_list[job_id].submit_address = sm_address
             self.job_list[job_id].start_time = start_time
             self.job_list[job_id].running_time = run_time
         else:
             print "len != 11"
             print "error : %s " % str(job_detail)
コード例 #6
0
 def get_jobs_detail(self):
     '''获取job的详细配置'''
     for job_id in self.job_list:
         job_detail_html = getJobsHtml('job_detail', job_id)
         job_detail = getList(u"<\/b>(.*?)<br>",
                              str(job_detail_html).strip())
         if len(job_detail) == 11:
             user, name, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail
             self.job_list[job_id].job_name = name
             self.job_list[job_id].submit_host = sm_host
             self.job_list[job_id].submit_address = sm_address
             self.job_list[job_id].start_time = start_time
             self.job_list[job_id].running_time = run_time
         elif len(job_detail) == 10:
             user, jf, sm_host, sm_address, no_1, no_2, status, start_time, run_time, jb_clean = job_detail
             self.job_list[job_id].job_name = job_id
             self.job_list[job_id].submit_host = sm_host
             self.job_list[job_id].submit_address = sm_address
             self.job_list[job_id].start_time = start_time
             self.job_list[job_id].running_time = run_time
         else:
             print "len != 11"
             print "error : %s " % str(job_detail)