Beispiel #1
0
   def submit_subjob(self,replica_id, jd):
       #######  submit job via pilot job ######
       i=replica_id
       if(i < self.RPB):
            k=0
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj]=time.time()
            self.job_states[sj] = sj.get_state()
            return sj

       elif((i>=self.RPB) and (i<2*self.RPB)):
            k=1
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj]=time.time()
            self.job_states[sj] = sj.get_state()
            return sj

       elif((i>=2*self.RPB) and (i<3*self.RPB)):
            k=2
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj]=time.time()
            self.job_states[sj] = sj.get_state()
            return sj

       else:
            k=3
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj]=time.time()
            self.job_states[sj] = sj.get_state()
            return sj
    def submit_subjob(self, replica_id, jd):
        #######  submit job via pilot job ######
        i = replica_id
        if (i < self.RPB):
            k = 0
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj] = time.time()
            self.job_states[sj] = sj.get_state()
            return sj

        elif ((i >= self.RPB) and (i < 2 * self.RPB)):
            k = 1
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj] = time.time()
            self.job_states[sj] = sj.get_state()
            return sj

        elif ((i >= 2 * self.RPB) and (i < 3 * self.RPB)):
            k = 2
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj] = time.time()
            self.job_states[sj] = sj.get_state()
            return sj

        else:
            k = 3
            sj = subjob()
            sj.submit_job(self.bjs[k].pilot_url, jd)
            self.job_start_times[sj] = time.time()
            self.job_states[sj] = sj.get_state()
            return sj
Beispiel #3
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=1
    number_of_processes = 1
    workingdirectory="." # working directory for agent
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

        
    #lrms_url = "ec2+ssh://localhost" # resource url to run on GCE
    lrms_url = "gce+ssh://locahost"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/echo"
    #jd.executable = "$HOME/hello.sh"
    jd.number_of_processes = "1"
    jd.arguments = ["$HELLOWORLD"]
    jd.environment = ['HELLOWORLD=hello_world']
    jd.input_data = ["hi", "ho"]
    
    # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
    #jd.working_directory = "/tmp" 
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Beispiel #4
0
 def _submit_cu(self, compute_unit):
     """ Submits compute unit to Bigjob """
     logger.debug("Submit CU to big-job")
     sj = subjob()
     sj.submit_job(self.__bigjob.pilot_url, compute_unit.subjob_description)
     self.__subjobs.append(sj)
     compute_unit._update_subjob(sj)
     return compute_unit
 def _submit_cu(self, compute_unit):
     """ Submits compute unit to Bigjob """
     logger.debug("Submit CU to big-job")
     sj = subjob()
     sj.submit_job(self.__bigjob.pilot_url, compute_unit.subjob_description)
     self.__subjobs.append(sj)
     compute_unit._update_subjob(sj)
     return compute_unit
Beispiel #6
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 8
    number_nodes = 24
    workingdirectory = os.getcwd()  # working directory for agent
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "pbs://localhost"  # resource url to run the jobs on localhost

    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, number_nodes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/hostname"
    jd.number_of_processes = "2"
    jd.spmd_variation = "single"
    jd.arguments = [""]
    #jd.working_directory = "/tmp"
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"

    for i in range(0, 12):
        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.wait()
    bj.cancel()
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 1
    number_of_processes = 1
    workingdirectory = "."  # working directory for agent
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)

    #lrms_url = "ec2+ssh://localhost" # resource url to run on GCE
    lrms_url = "gce+ssh://locahost"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, number_of_processes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/echo"
    #jd.executable = "$HOME/hello.sh"
    jd.number_of_processes = "1"
    jd.arguments = ["$HELLOWORLD"]
    jd.environment = ['HELLOWORLD=hello_world']
    jd.input_data = ["hi", "ho"]

    # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
    #jd.working_directory = "/tmp"
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)

    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if (state == "Failed" or state == "Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Beispiel #8
0
 def __submit_cu(self, compute_unit):
     """ Submits compute unit to Bigjob """
             
     if len(self.pilot_job_services)!=1:
         raise PilotError("No PilotComputeService found. Please start a PCS before submitting ComputeUnits.")
     
     sj = subjob()
     self.pcs_coordination_namespace=self.pilot_job_services[0].coordination_queue
     logger.debug("Submit CU to big-job via external queue: %s"%self.pcs_coordination_namespace)
     sj.submit_job(self.pcs_coordination_namespace, compute_unit.subjob_description)
     compute_unit._update_subjob(sj)
     return compute_unit
 def __init__(self, compute_unit_description=None, compute_data_service=None, cu_url=None):
     
     if cu_url==None:
         self.id = self.CU_ID_PREFIX + str(uuid.uuid1())
         if compute_data_service!=None:
             self.url = compute_data_service.url + "/" + self.id
             logger.debug("Created CU: %s"%self.url)  
         self.state = State.New       
         self.__subjob = None # reference to BigJob Subjob 
         self.compute_unit_description = compute_unit_description # CU Description
         self.subjob_description = self.__translate_cu_sj_description(compute_unit_description)
     else:
         self.__subjob = subjob(subjob_url=cu_url)
Beispiel #10
0
    def __init__(self,
                 compute_unit_description=None,
                 compute_data_service=None,
                 cu_url=None):

        if cu_url == None:
            self.id = self.CU_ID_PREFIX + str(uuid.uuid1())
            if compute_data_service != None:
                self.url = compute_data_service.url + "/" + self.id
                logger.debug("Created CU: %s" % self.url)
            self.state = State.New
            self.__subjob = None  # reference to BigJob Subjob
            self.compute_unit_description = compute_unit_description  # CU Description
            self.subjob_description = self.__translate_cu_sj_description(
                compute_unit_description)
        else:
            self.__subjob = subjob(subjob_url=cu_url)
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.get_url() + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jobs = []
    job_start_times = {}
    job_states = {}
    for i in range(0, NUMBER_JOBS):
        jd = description()
        jd.executable = "/bin/date"
        jd.number_of_processes = "1"
        jd.spmd_variation = "single"
        jd.arguments = [""]
        jd.output = "sj-stdout-"+str(i)+".txt"
        jd.error = "sj-stderr-"+str(i)+".txt"

        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)
        jobs.append(sj)
        job_start_times[sj]=time.time()
        job_states[sj] = sj.get_state()

    print "Terminating application. You can reconnect to BJ via the following URL: %s"%bj.get_url()

Beispiel #12
0
    ##########################################################################################
    # Submit SubJob through BigJob
    jobs = []
    job_start_times = {}
    job_states = {}
    for i in range(0, NUMBER_JOBS):
        jd = saga.job.description()
        jd.executable = "/bin/date"
        jd.number_of_processes = "1"
        jd.spmd_variation = "single"
        jd.arguments = [""]
        jd.working_directory = os.getcwd()
        jd.output = "sj-stdout-" + str(i) + ".txt"
        jd.error = "sj-stderr" + str(i) + ".txt"

        sj = subjob(advert_host)
        sj.submit_job(bj.pilot_url, jd)
        jobs.append(sj)
        job_start_times[sj] = time.time()
        job_states[sj] = sj.get_state()

    # busy wait for completion
    while 1:
        finish_counter = 0
        result_map = {}
        for i in range(0, NUMBER_JOBS):
            old_state = job_states[jobs[i]]
            state = jobs[i].get_state()
            if result_map.has_key(state) == False:
                result_map[state] = 1
            else:
Beispiel #13
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=4
    number_of_processes = 8
    workingdirectory= os.path.join(os.getcwd(), "agent")
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "condor://localhost"

    ##########################################################################################


    input_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "test.txt")
    bj_filetransfers = [input_file +" > test.txt"]
  
    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        None,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node,
                        bj_filetransfers)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()

    jd.executable = "/bin/cat"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    jd.arguments = ["test.txt"]
    #jd.working_directory = "" 
    jd.output = "sj-stdout.txt"
    jd.error = "sj-stderr.txt"    

    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        bj_state = bj.get_state()
        print "bj state: " + str(bj_state) + " state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Beispiel #14
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = "normal"  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 4
    number_of_processes = 8
    #workingdirectory=os.path.join(os.getcwd(), "agent")  # working directory for agent
    workingdirectory = "agent"
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:

    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.

    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "ssh://boskop"
    #lrms_url = "sge://localhost"
    #lrms_url = "fork://localhost"

    ##########################################################################################

    # for i in range(99999):
    #     js = saga.job.Service (lrms_url)
    #     j  = js.run_job ("/bin/sleep 1000")
    #     print "%4d: %s" % (i, j.state)

    for i in range(99999):
        print i

        print "Start Pilot Job/BigJob at: " + lrms_url
        bj = bigjob(COORDINATION_URL)
        bj.start_pilot_job(lrms_url, number_of_processes, queue, project,
                           workingdirectory, userproxy, walltime,
                           processes_per_node)

        print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
            bj.get_state())

        ##########################################################################################
        # Submit SubJob through BigJob
        jd = description()
        jd.executable = "/bin/echo"
        #jd.executable = "$HOME/hello.sh"
        jd.number_of_processes = "1"
        jd.arguments = ["$HELLOWORLD"]
        jd.environment = ['HELLOWORLD=hello_world']
        #jd.spmd_variation = "mpi"

        # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
        #jd.working_directory = "/tmp"
        jd.output = "stdout.txt"
        jd.error = "stderr.txt"

        sj = subjob()
        sj.submit_job(bj.pilot_url, jd)

        #########################################
        # busy wait for completion
        while 1:
            state = str(sj.get_state())
            print "state: " + state
            if (state == "Failed" or state == "Done"):
                break
            time.sleep(2)

        ##########################################################################################
        # Cleanup - stop BigJob
        bj.cancel()
Beispiel #15
0
 sjs = []
 cpr = CPR
 for i in range(0, NUMBER_REPLICAS):
     stage_files(i)
     jd = saga.job.description()
     # jd.executable = "namd2"
     jd.number_of_processes = cpr
     jd.spmd_variation = "mpi"
     # jd.arguments = ["NPT.conf"]
     jd.working_directory = WORK_DIR + "agent/" + str(i) + "/"
     # os.system("cp NPT.conf NPT.conf")
     jd.arguments = ["NPT.conf"]
     jd.output = str(i) + "/stdout-" + str(i) + ".txt"
     jd.error = str(i) + "/stderr-" + str(i) + ".txt"
     jds.append(jd)
     sj = bigjob.subjob(advert_host)
     sjs.append(sj)
     # prepare config and scp other files to remote machine
     NAMD_config(i)
     if i < RPB:
         j = 0
         jd.executable = EXE
         copy_with_saga(i)
         sjs[i].submit_job(bjs[j].pilot_url, jds[i], str(i))
     elif i >= RPB and i < (2 * RPB):
         j = 1
         jd.executable = EXE1
         copy_with_saga(i)
         sjs[i].submit_job(bjs[j].pilot_url, jds[i], str(i))
     elif i >= (2 * RPB) and i < (3 * RPB):
         j = 2
    job_states = {}

    # Submit Jobs through BigJob
    # Here you can add any arguments to each SubJob, change the ouput and error filenames and so on
    # change this to your heart's content, but be careful

    for i in range(0, NUMBER_JOBS):
        jd = description()
        jd.executable = "/bin/echo"
        jd.number_of_processes = "4"
        jd.spmd_variation = "mpi" # for serial codes jd.spmd_variation="single"
        jd.arguments = ["$INFRASTRUCTURE"]
        jd.environment = ["INFRASTRUCTURE=FutureGrid"]
        jd.output = "sj-stdout-"+str(i)+".txt"
        jd.error = "sj-stderr-"+str(i)+".txt"
        sj = subjob()
        jobs.append(sj)
        sj.submit_job(bj.pilot_url, jd)
        job_start_times[sj]=time.time()
        job_states[sj] = sj.get_state()

    # busy wait for completion
    while 1:
        finish_counter=0
        result_map = {}
        for i in range(0, NUMBER_JOBS):
            old_state = job_states[jobs[i]]
            state = jobs[i].get_state()
 
            if result_map.has_key(state)==False:
                result_map[state]=1
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue=None # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=4
    number_of_processes = 8
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)
    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "sge-ssh://lonestar.tacc.teragrid.org"
    
    """
        To use Globus Online the working directory must be specified using the following conventions
    """ 
    workingdirectory="go://"+GLOBUS_ONLINE_USER+":"+GLOBUS_ONLINE_PASSWORD+"@globusonline.org?ep=xsede#lonestar4&path=~/bigjob/"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    
    bj_filetransfers = ["go://"+GLOBUS_ONLINE_USER+":"+GLOBUS_ONLINE_PASSWORD+"@globusonline.org?ep=drelu#MacBook&path=" + os.path.dirname(os.path.abspath(__file__)) 
                        + "/test.txt > BIGJOB_WORK_DIR"]
    
    
    bj.start_pilot_job( lrms_url,
                        None,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node,
                        bj_filetransfers)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/cat"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    jd.arguments = ["test.txt"]
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    jd.file_transfer = ["go://"+GLOBUS_ONLINE_USER+":"+GLOBUS_ONLINE_PASSWORD+"@globusonline.org?ep=drelu#MacBook&path=" + os.path.dirname(os.path.abspath(__file__)) 
                       + "/test.txt > SUBJOB_WORK_DIR"]
    
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Beispiel #18
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 4
    number_of_processes = 8
    workingdirectory = os.path.join(os.getcwd(),
                                    "agent")  # working directory for agent
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    #lrms_url = "fork://localhost" # resource url to run the jobs on localhost
    lrms_url = "condorg://brgw1.renci.org:2119/jobmanager-pbs"

    #lrms_url = "ssh://[email protected]"
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, None, number_of_processes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()

    jd.executable = "/bin/date"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    #jd.arguments = ["match -f  bgr1.fa -A 0  -r reads_1.fastq -n 4 -T /tmp/ > bfast.matches.file.bgr.1.bmf"]
    jd.arguments = [""]
    #jd.working_directory = ""
    jd.output = "bfast-stdout.txt"
    jd.error = "bfast-stderr.txt"

    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)

    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if (state == "Failed" or state == "Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Beispiel #19
0
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue="normal" # if None default queue is used
    project=None # if None default allocation is used 
    walltime=10
    processes_per_node=4
    number_of_processes = 8
    #workingdirectory=os.path.join(os.getcwd(), "agent")  # working directory for agent
    workingdirectory="agent"
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "fork://localhost" # resource url to run the jobs on localhost
    #lrms_url = "sge://localhost" # resource url to run the jobs on localhost
    #lrms_url = "ssh://localhost" # resource url to run the jobs on localhost
   
    ##########################################################################################

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job( lrms_url,
                        number_of_processes,
                        queue,
                        project,
                        workingdirectory,
                        userproxy,
                        walltime,
                        processes_per_node)
    
    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()
    jd.executable = "/bin/echo"
    #jd.executable = "$HOME/hello.sh"
    jd.number_of_processes = "1"
    jd.arguments = ["$HELLOWORLD"]
    jd.environment = ['HELLOWORLD=hello_world']
    #jd.spmd_variation = "mpi"
    
    # specify an optinal working directory if sub-job should be executed outside of bigjob sandbox
    #jd.working_directory = "/tmp" 
    jd.output = "stdout.txt"
    jd.error = "stderr.txt"
    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)
    
    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        print "state: " + state
        if(state=="Failed" or state=="Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Beispiel #20
0
 jds = []
 sjs = []
 for i in range(0, NUMBER_REPLICAS):
     stage_files(i)
     jd = saga.job.description()
     jd.executable = "namd2"
     jd.number_of_processes = "16"
     jd.spmd_variation = "mpi"
     # jd.arguments = ["NPT.conf"]
     jd.working_directory = WORK_DIR + "agent/" + str(i) + "/"
     #os.system("cp NPT.conf NPT.conf")
     jd.arguments = ["NPT.conf"]
     jd.output = str(i) + "/stdout-" + str(i) + ".txt"
     jd.error = str(i) + "/stderr-" + str(i) + ".txt"
     jds.append(jd)
     sj = bigjob.subjob(advert_host)
     sjs.append(sj)
     #prepare config and scp other files to remote machine
     NAMD_config(i)
     if i < RPB:
         j = 0
         copy_with_saga(i)
         sjs[i].submit_job(bjs[j].pilot_url, jds[i], str(i))
     elif (i >= RPB and i < (2 * RPB)):
         j = 1
         copy_with_saga(i)
         sjs[i].submit_job(bjs[j].pilot_url, jds[i], str(i))
     elif (i >= (2 * RPB) and i < (3 * RPB)):
         j = 2
         copy_with_saga(i)
         sjs[i].submit_job(bjs[j].pilot_url, jds[i], str(i))
def main():
    # Start BigJob

    ##########################################################################################
    # Edit parameters for BigJob
    queue = None  # if None default queue is used
    project = None  # if None default allocation is used
    walltime = 10
    processes_per_node = 4
    number_of_processes = 8
    workingdirectory = os.path.join(os.getcwd(), "agent")
    userproxy = None  # userproxy (not supported yet due to context issue w/ SAGA)
    """ 
    URL of the SAGA Job Service that is used to dispatch the pilot job.
    The following URLs are accepted:
    
    lrms_url = "gram://oliver1.loni.org/jobmanager-pbs" # globus resource url used when globus is used. (LONI)
    lrms_url = "pbspro://louie1.loni.org" # pbspro resource url used when pbspro scheduling system is used.(Futuregrid or LSU Machines)
    lrms_url = "ssh://louie1.loni.org" # ssh resource url which launches jobs on target machine. Jobs not submitted to scheduling system.
    lrms_url = "pbs-ssh://louie1.loni.org" # Submit jobs to scheduling system of remote machine.
    lrms_url = "xt5torque://localhost" # torque resource url.
    
    Please ensure that the respective SAGA adaptor is installed and working
    """
    lrms_url = "condor://localhost"

    ##########################################################################################

    input_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..",
                              "test.txt")
    bj_filetransfers = [input_file + " > test.txt"]

    print "Start Pilot Job/BigJob at: " + lrms_url
    bj = bigjob(COORDINATION_URL)
    bj.start_pilot_job(lrms_url, None, number_of_processes, queue, project,
                       workingdirectory, userproxy, walltime,
                       processes_per_node, bj_filetransfers)

    print "Pilot Job/BigJob URL: " + bj.pilot_url + " State: " + str(
        bj.get_state())

    ##########################################################################################
    # Submit SubJob through BigJob
    jd = description()

    jd.executable = "/bin/cat"
    jd.number_of_processes = "1"
    jd.spmd_variation = "single"
    jd.arguments = ["test.txt"]
    #jd.working_directory = ""
    jd.output = "sj-stdout.txt"
    jd.error = "sj-stderr.txt"

    sj = subjob()
    sj.submit_job(bj.pilot_url, jd)

    #########################################
    # busy wait for completion
    while 1:
        state = str(sj.get_state())
        bj_state = bj.get_state()
        print "bj state: " + str(bj_state) + " state: " + state
        if (state == "Failed" or state == "Done"):
            break
        time.sleep(2)

    ##########################################################################################
    # Cleanup - stop BigJob
    bj.cancel()
Beispiel #22
0
        sj_nimbus.submit_job(jd_nimbus)
        jobs_nimbus.append(sj_nimbus)
        number_started_jobs = number_started_jobs + 1

    for i in range(0, NUMBER_JOBS_EC2):
        print "Start job no.: " +str(number_started_jobs + 1) + " on EC2"
        sj_ec2 = bigjob_cloud.subjob(bigjob=bj_ec2)
        jd_ec2.output = "stdout_ec2.txt."+str(number_started_jobs+1)
        jd_ec2.error = "stderr_ec2.txt."+str(number_started_jobs+1)
        sj_ec2.submit_job(jd_ec2)
        jobs_ec2.append(sj_ec2)
        number_started_jobs = number_started_jobs + 1

    for i in range(0, NUMBER_JOBS_GRID):
        print "Start job no.: " +str(number_started_jobs + 1) + " on Grid"
        sj_tg = bigjob.subjob(advert_host)
        jd.output = "stdout_tg.txt."+str(number_started_jobs+1)
        jd.error = "stderr_tg.txt."+str(number_started_jobs+1)
        sj_tg.submit_job(bj_tg.pilot_url, jd)
        jobs_tg.append(sj_tg)
        number_started_jobs = number_started_jobs + 1

    for i in range(0, NUMBER_JOBS_CONDOR):
        print "Start job no.: " +str(number_started_jobs + 1) + " on Condor"
        sj_condor = bigjob_condor.subjob(bigjob=bj_condor)
        jd_condor.output = "stdout_condor.txt."+str(number_started_jobs+1)
        jd_condor.error = "stderr_condor.txt."+str(number_started_jobs+1)
        sj_condor.submit_job(jd_condor)
        jobs_condor.append(sj_condor)
        number_started_jobs = number_started_jobs + 1