Python subjobの例

プログラミング言語: Python

名前空間/パッケージ名: bigjob.bigjob_manager

メソッド/関数: subjob

hotexamples.comのコード掲載数: 5

Python subjob - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのbigjob.bigjob_manager.subjobの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

    def reduce_job_submit(self):
        ##########################################################################################
        part_list_argument = []
        jobs = []
        job_start_times = {}
        job_states = {}

        print " moving files .... to output location.... " + saga.url(
            self.__output_dir).path
        file_transfer_time = time.time()
        self.__sorted_partition_file_names = glob.glob(
            saga.url(self.__tmp_dir).path + "/*sorted-part-*")

        file_transfer_size = 0
        for u in self.__sorted_partition_file_names:
            #print " >>> Partition to be moved ...  " + u + " to " + self.__output_dir
            part_file = saga.filesystem.file(u)
            file_transfer_size = file_transfer_size + part_file.get_size()
            part_file.move(self.__output_dir)

        file_transfer_comp = time.time() - file_transfer_time
        print " Time taken to transfer partition files to output location " + str(
            round(file_transfer_comp, 3)) + "\n\n"
        print " The total file size transferred is(MB): " + str(
            round((file_transfer_size / (1024.0 * 1024.0)), 3)) + "\n\n"

        for i in range(0, self.__nbr_reduces):
            part_list_string = ""
            for u in self.__sorted_partition_file_names:
                file_name = (os.path.split(u))[1]
                if str(u.split("-")[-1:][0]) == str(i):
                    part_list_string = part_list_string + ":" + saga.url(
                        self.__output_dir).path + "/" + file_name
            part_list_argument.append(part_list_string)

        for i in part_list_argument:
            print " The argument list is " + i
            # create job description
            try:
                jd2 = saga.job.description()
                jd2.executable = self.__reducer
                jd2.number_of_processes = self.npworkers
                jd2.spmd_variation = "single"
                jd2.arguments = [i] + self.redarg
                jd2.working_directory = saga.url(self.__output_dir).path
                jd2.output = "stdout_reduce" + str(self.__nbr_reduce_jobs)
                jd2.error = "stderr-reduce" + str(self.__nbr_reduce_jobs)
                sj = subjob()
                sj.submit_job(self.__bj.pilot_url, jd2)
                print "Submited Reduce sub-job " + str(self.__nbr_reduce_jobs)
                jobs.append(sj)
                job_start_times[sj] = time.time()
                job_states[sj] = sj.get_state()
                self.__nbr_reduce_jobs = self.__nbr_reduce_jobs + 1
            except:
                #traceback.print_exc(file=sys.stdout)
                print " Reduce Job failed. Cancelling bigjob......"
                self.__bj.cancel()
                sys.exit(0)
                try:
                    self.__bj.cancel()
                except:
                    pass

        print "************************ All Reduce Jobs submitted ************************"

        print " Reduce subjobs created "
        # Wait for task completion of map tasks - synchronization

        ############################################################################################
        # Wait for task completion of map tasks - synchronization
        wait_for_all_jobs(jobs, job_start_times, job_states, 5)
        ############################################################################################

        self.__bj.cancel()

コード例 #2

ファイルを表示

    def map_job_submit(self):
        ##########################################################################################
        print " >>> Starting BigJob ..................... \n"
        jobs = []
        job_start_times = {}
        job_states = {}
        print " >>> Create bigjob with advert service at ... ", self.advert_host + "\n"

        print " >> BigJob parameters " + self.advert_host + "\n"
        print " >> resource url " + self.resource_url + "\n"
        print " >> Number of processes " + str(self.number_of_processes) + "\n"
        print " >> Queue " + str(self.queue) + "\n"
        print " >> Allocation " + str(self.allocation) + "\n"
        print " >> Working directory" + self.workingdirectory + "\n"
        print " >> userproxy " + str(self.userproxy) + "\n"
        print " >> walltime " + str(self.walltime) + "\n"
        print " >> ppn " + str(self.ppn) + "\n"

        self.__bj = bigjob(self.advert_host)
        self.__bj.start_pilot_job(self.resource_url, None,
                                  self.number_of_processes, self.queue,
                                  self.allocation, self.workingdirectory,
                                  self.userproxy, self.walltime, self.ppn)
        i = 0
        for u in self.__chunk_list:
            uname = "-".join(u)
            i = i + 1
            print " >>> chunk path/name to be submitted to map subjob  " + uname

            # create job description
            try:

                jd = saga.job.description()
                jd.executable = self.__mapper
                jd.number_of_processes = self.npworkers
                jd.spmd_variation = "single"
                jd.arguments = u + [str(self.__nbr_reduces)] + self.maparg
                jd.working_directory = saga.url(self.__tmp_dir).path
                jd.output = "stdout-map" + str(i) + ".txt"
                jd.error = "stderr-map" + str(i) + ".txt"
                sj = subjob()
                sj.submit_job(self.__bj.pilot_url, jd)
                print "Submited sub-job " + uname + "."
                jobs.append(sj)
                job_start_times[sj] = time.time()
                job_states[sj] = sj.get_state()
            except:
                #traceback.print_exc(file=sys.stdout)
                print " Map Job failed. Cancelling bigjob......"
                self.__bj.cancel()
                sys.exit(0)
                try:
                    self.__bj.cancel()
                except:
                    pass

        print "************************ All Jobs submitted ************************"
        print " No of map subjobs created - " + str(len(jobs))
        # Wait for task completion of map tasks - synchronization

        ############################################################################################
        # Wait for task completion of map tasks - synchronization
        wait_for_all_jobs(jobs, job_start_times, job_states, 5)

コード例 #3

ファイルを表示

ファイル: mapreduce.py プロジェクト: saga-project/saga-cpp-legacy-projects

    def reduce_job_submit(self):
    ##########################################################################################        
        part_list_argument = []
        jobs = []
        job_start_times = {}
        job_states = {}

        print " moving files .... to output location.... " + saga.url(self.__output_dir).path
        file_transfer_time = time.time()
        self.__sorted_partition_file_names = glob.glob(saga.url(self.__tmp_dir).path +  "/*sorted-part-*")
            
        file_transfer_size = 0   
        for u in self.__sorted_partition_file_names:               
            #print " >>> Partition to be moved ...  " + u + " to " + self.__output_dir
            part_file =  saga.filesystem.file(u)
            file_transfer_size = file_transfer_size + part_file.get_size()
            part_file.move(self.__output_dir) 
        
        file_transfer_comp = time.time() - file_transfer_time
        print " Time taken to transfer partition files to output location " + str( round(file_transfer_comp,3)) + "\n\n"
        print " The total file size transferred is(MB): " + str ( round((file_transfer_size/(1024.0 * 1024.0)),3) ) + "\n\n"

        for i in range(0,self.__nbr_reduces):
            part_list_string =""
            for u in self.__sorted_partition_file_names:   
                file_name=(os.path.split(u))[1]
                if str(u.split("-")[-1:][0]) == str(i):
                    part_list_string = part_list_string + ":" + saga.url(self.__output_dir).path + "/" + file_name     
            part_list_argument.append(part_list_string)
        
        for i in part_list_argument:
            print " The argument list is " + i
            # create job description
            try:
                jd2 = saga.job.description()
                jd2.executable = self.__reducer
                jd2.number_of_processes = self.npworkers
                jd2.spmd_variation = "single"                
                jd2.arguments = [i] + self.redarg
                jd2.working_directory = saga.url(self.__output_dir).path
                jd2.output = "stdout_reduce" + str(self.__nbr_reduce_jobs) 
                jd2.error =  "stderr-reduce" + str(self.__nbr_reduce_jobs)  
                sj = subjob()
                sj.submit_job(self.__bj.pilot_url, jd2)
                print "Submited Reduce sub-job " + str(self.__nbr_reduce_jobs)
                jobs.append(sj)
                job_start_times[sj]=time.time()
                job_states[sj] = sj.get_state()
                self.__nbr_reduce_jobs = self.__nbr_reduce_jobs + 1
            except:
                #traceback.print_exc(file=sys.stdout)
                print " Reduce Job failed. Cancelling bigjob......"
                self.__bj.cancel()
                sys.exit(0)
                try:
                    self.__bj.cancel()
                except:
                    pass                      
            
        print "************************ All Reduce Jobs submitted ************************"

        print " Reduce subjobs created "
        # Wait for task completion of map tasks - synchronization      
        
        ############################################################################################
        # Wait for task completion of map tasks - synchronization    
        wait_for_all_jobs(jobs, job_start_times, job_states,  5)
        ############################################################################################
            
        self.__bj.cancel()

コード例 #4

ファイルを表示

def load_test(coordination_url, number_jobs, number_nodes, number_cores_per_node):
    
    print "\n**************************************************************************************************************************************************\n"
    print ("Start test scenario - #nodes:%d, #cores/node:%d, #jobs: %d, coordination-url:%s, lrms-url:%s"%
          (number_nodes, number_cores_per_node, number_jobs, coordination_url, LRMS_URL))
    print "\n**************************************************************************************************************************************************\n"      
    
    starttime=time.time()
    ##########################################################################################
    # Start BigJob
    # Parameter for BigJob
    lrms_url = LRMS_URL
    workingdirectory="/N/u/luckow/src/bigjob-performance/agent"  # working directory for agent
   
    # start pilot job (bigjob_agent)
    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(coordination_url)
    bj.start_pilot_job(lrms_url=lrms_url,
                       number_nodes=number_nodes,
                       processes_per_node=number_cores_per_node,
                       working_directory=workingdirectory
                      )
        
    queueing_time = None    
    subjob_submission_time = None    
    pilot_state = str(bj.get_state_detail())
    if pilot_state=="Running" and queueing_time==None:
            queueing_time=time.time()-starttime
            print "*** Pilot State: " + pilot_state + " queue time: " + str(queueing_time)
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + pilot_state

    ##########################################################################################
    # Submit SubJob through BigJob
    jobs = []
    job_start_times = {}
    job_states = {}
    for i in range(0, number_jobs):
        jd = saga.job.description()
        jd.executable = "/bin/date"
        jd.number_of_processes = "1"
        jd.spmd_variation = "single"
        jd.arguments = [""]
        jd.working_directory = os.getcwd() 
        jd.output = "sj-stdout-"+str(i)+".txt"
        jd.error = "sj-stderr-"+str(i)+".txt"

        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)
        jobs.append(sj)
        job_start_times[sj]=time.time()
        job_states[sj] = sj.get_state()

        if pilot_state != "Running":
            pilot_state = str(bj.get_state_detail())
            if pilot_state=="Running" and queueing_time==None:
                queueing_time=time.time()-starttime
                print "*** Pilot State: " + pilot_state + " queue time: " + str(queueing_time)

    subjob_submission_time = time.time()-starttime
    # busy wait for completion
    while 1:        
        pilot_state = str(bj.get_state_detail())
        if pilot_state=="Running" and queueing_time==None:
            queueing_time=time.time()-starttime
            print "*** Pilot State: " + pilot_state + " queue time: " + str(queueing_time)
        finish_counter=0
        result_map = {}
        for i in range(0, number_jobs):
            old_state = job_states[jobs[i]]
            state = jobs[i].get_state()
            if result_map.has_key(state)==False:
                result_map[state]=1
            else:
                result_map[state] = result_map[state]+1
            #pdb.set_trace()
            if old_state != state:
                print "Job " + str(jobs[i]) + " changed from: " + old_state + " to " + state
            if old_state != state and has_finished(state)==True:
                print "Job: " + str(jobs[i]) + " Runtime: " + str(time.time()-job_start_times[jobs[i]]) + " s."
            if has_finished(state)==True:
                finish_counter = finish_counter + 1                
            job_states[jobs[i]]=state

        print "Pilot State: %s; %d/%d jobs finished"%(pilot_state,finish_counter,number_jobs)
        if finish_counter >= number_jobs-1 or pilot_state == "Failed":
            break
        time.sleep(2)

    runtime = time.time()-starttime
    #print "Runtime: " + str(runtime) + " s; Runtime per Job: " + str(runtime/NUMBER_JOBS)
    ##########################################################################################
    # Cleanup - stop BigJob
    
    result = ("%d,%d,%d,%s,%s,%s,%s,%s"% 
             (number_nodes, number_cores_per_node, number_jobs, str(runtime), str(queueing_time),coordination_url, LRMS_URL,str(subjob_submission_time)))
    
    result_tab = ("%d\t%d\t%d\t%s\t%s\t%s\t%s"% 
             (number_nodes, number_cores_per_node, number_jobs, str(runtime), str(queueing_time), coordination_url, LRMS_URL))
    print ("#Nodes\t#cores/node\t#jobs\tRuntime\tQueuing Time\tCoordination URL\tLRMS URL")
    print result_tab
    
    bj.cancel()
    # hack: delete manually pbs jobs of user
    os.system("qstat -u `whoami` | grep -o ^[0-9]* |xargs qdel")
    #os.system("saga-advert remove_directory advert://advert.cct.lsu.edu:8080/bigjob")
    return result

コード例 #5

ファイルを表示

ファイル: mapreduce.py プロジェクト: saga-project/saga-cpp-legacy-projects

    def map_job_submit(self):
    ##########################################################################################
        print " >>> Starting BigJob ..................... \n"
        jobs = []
        job_start_times = {}
        job_states = {}
        print " >>> Create bigjob with advert service at ... " , self.advert_host + "\n"
        
        print " >> BigJob parameters " + self.advert_host + "\n" 
        print " >> resource url " + self.resource_url + "\n"
        print " >> Number of processes " +  str(self.number_of_processes) + "\n" 
        print " >> Queue " + str(self.queue) + "\n" 
        print " >> Allocation " +  str(self.allocation) + "\n" 
        print " >> Working directory" + self.workingdirectory + "\n" 
        print " >> userproxy " + str(self.userproxy) + "\n" 
        print " >> walltime " + str(self.walltime) + "\n" 
        print " >> ppn " + str(self.ppn) + "\n"
                                  
        self.__bj = bigjob(self.advert_host) 
        self.__bj.start_pilot_job( self.resource_url, None, self.number_of_processes, self.queue, self.allocation, 
                                   self.workingdirectory, self.userproxy, self.walltime, self.ppn)
        i=0
        for u in self.__chunk_list:          
            uname="-".join(u)
            i=i+1
            print " >>> chunk path/name to be submitted to map subjob  " + uname

            # create job description
            try:
                
                jd = saga.job.description()
                jd.executable = self.__mapper
                jd.number_of_processes = self.npworkers
                jd.spmd_variation = "single"                
                jd.arguments = u + [str(self.__nbr_reduces)] + self.maparg
                jd.working_directory = saga.url(self.__tmp_dir).path
                jd.output = "stdout-map" + str(i) + ".txt"
                jd.error = "stderr-map" + str(i) + ".txt"
                sj = subjob()
                sj.submit_job(self.__bj.pilot_url, jd)
                print "Submited sub-job " + uname + "."
                jobs.append(sj)
                job_start_times[sj]=time.time()
                job_states[sj] = sj.get_state()
            except:
                #traceback.print_exc(file=sys.stdout)
                print " Map Job failed. Cancelling bigjob......"
                self.__bj.cancel()
                sys.exit(0)
                try:
                    self.__bj.cancel()
                except:
                    pass  
            
        print "************************ All Jobs submitted ************************"
        print " No of map subjobs created - " + str(len(jobs))
        # Wait for task completion of map tasks - synchronization      
        
        ############################################################################################
        # Wait for task completion of map tasks - synchronization    
        wait_for_all_jobs(jobs, job_start_times,job_states, 5)