def start_glidin_jobs(RE_info):
    """start glidin jobs (advert_job.py) at every unique machine specified in RE_info"""
    unique_hosts = set(RE_info.remote_hosts)
    for i in unique_hosts:
        print "Number hosts: " + str(
            RE_info.remote_hosts.count(i)
        ) + " Number processes per job: " + str(
            RE_info.numberofprocesses) + " Number GlideIns per Host: " + str(
                RE_info.number_glideins_per_host) + " Index: " + str(
                    RE_info.remote_hosts.index(i))
        nodes = int(RE_info.remote_hosts.count(i)) * int(
            RE_info.numberofprocesses)
        lrms = RE_info.remote_host_local_schedulers[RE_info.remote_hosts.index(
            i)]
        project = RE_info.projects[RE_info.remote_hosts.index(i)]
        queue = RE_info.queues[RE_info.remote_hosts.index(i)]
        workingdirectory = RE_info.workingdirectories[
            RE_info.remote_hosts.index(i)]
        userproxy = None
        try:
            userproxy = RE_info.userproxy[RE_info.remote_hosts.index(i)]
        except:
            pass
        if (CPR == True):
            lrms_url = "migol://"
        else:
            lrms_url = "gram://"
        lrms_url = lrms_url + i + "/" + "jobmanager-" + lrms
        nodes_per_glidein = nodes
        num_glidein = RE_info.number_glideins_per_host
        if (num_glidein != None and num_glidein != 0):
            nodes_per_glidein = nodes / num_glidein  # must be divisible
            print "Glidin URL: " + lrms_url
            print "hosts: " + str(
                i) + " number of replica_processes (total): " + str(nodes)
            print "number glide-ins: " + str(
                num_glidein) + " node per glidein: " + str(nodes_per_glidein)
            print "Project: " + project + " Queue: " + queue + " Working Dir: " + workingdirectory

            # start job
            for ng in range(0, RE_info.number_glideins_per_host):
                advert_glidin_job = advert_job.advert_glidin_job(
                    RE_info.advert_host)
                advert_glidin_job.start_glidin_job(lrms_url, None,
                                                   nodes_per_glidein, queue,
                                                   project, workingdirectory,
                                                   userproxy, None)
                if RE_info.advert_glidin_jobs.has_key(i) == False:
                    RE_info.advert_glidin_jobs[i] = []
                RE_info.advert_glidin_jobs[i].append(advert_glidin_job)
                print "Started: " + str(
                    advert_glidin_job) + " Glide-In Job Number: " + str(ng)
 def init_bigjobs(self):
     """ start on specified resources a bigjob """
     self.bigjob_list = self.schedule_bigjobs()
     for i in self.bigjob_list:
         gram_url = i["gram_url"]
         logging.debug("start bigjob at: " + gram_url)
         bigjob = advert_job.advert_glidin_job(self.advert_host)
         bigjob.start_glidin_job(gram_url, i["re_agent"], i["number_cores"],
                                 i["queue"], i["allocation"], "$(HOME)",
                                 None, None)
         i["bigjob"] = bigjob  # store bigjob for later reference in dict
         i["free_cores"] = int(i["number_cores"])
         # lock for modifying the number of free nodes
         i["lock"] = threading.Lock()
    def start_glidin_jobs(self):
        """start glidin jobs (advert_job.py) at every unique machine specified in RE_info"""
        for resource in self.resourceMap.keys():
            i = self.resourceMap[resource]
            host = i["host"]
            num_glidein = int(i["number_glide_in"])
            nodes = int(i["number_nodes"])
            lrms = i["scheduler"]
            project = i["allocation"]
            queue = i["queue"]
            workingdirectory = i["working_dir_root"]
            userproxy = None
            try:
                userproxy = i["userproxy"]
            except:
                pass

            if (self.cpr == True):
                lrms_url = "migol://"
            else:
                lrms_url = "gram://"

            lrms_url = lrms_url + host + "/" + "jobmanager-" + lrms
            nodes_per_glidein = nodes

            if (num_glidein != None and num_glidein != 0):
                nodes_per_glidein = nodes / num_glidein  # must be divisible
                print "Glidin URL: " + lrms_url
                print "hosts: " + str(
                    i) + " number of replica_processes (total): " + str(nodes)
                print "number glide-ins: " + str(
                    num_glidein) + " node per glidein: " + str(
                        nodes_per_glidein)
                print "Project: " + project + " Queue: " + queue + " Working Dir: " + workingdirectory

                # start job
                for ng in range(0, num_glidein):
                    advert_glidin_job = advert_job.advert_glidin_job(
                        self.advert_host)
                    advert_glidin_job.start_glidin_job(lrms_url, self.re_agent,
                                                       nodes_per_glidein,
                                                       queue, project,
                                                       workingdirectory,
                                                       userproxy, None)
                    if i.has_key("glide_in_jobs") == False:
                        i["glide_in_jobs"] = []
                    i["glide_in_jobs"].append(advert_glidin_job)
                    print "Started: " + str(
                        advert_glidin_job) + " Glide-In Job Number: " + str(ng)
 def init_bigjobs(self):
     """ start on specified resources a bigjob """
     self.bigjob_list = self.schedule_bigjobs()
     for i in self.bigjob_list:
         gram_url = i["gram_url"]
         logging.debug("start bigjob at: " + gram_url)
         bigjob = advert_job.advert_glidin_job(self.advert_host)
         bigjob.start_glidin_job(gram_url,
                                 i["re_agent"],
                                 i["number_cores"],
                                 i["queue"],
                                 i["allocation"],
                                 "$(HOME)", 
                                 None,
                                 None)
         i["bigjob"]=bigjob # store bigjob for later reference in dict
         i["free_cores"]=int(i["number_cores"])
         # lock for modifying the number of free nodes
         i["lock"] = threading.Lock()
Example #5
0
def start_glidin_jobs(RE_info):
    """start glidin jobs (advert_job.py) at every unique machine specified in RE_info"""  
    unique_hosts = set(RE_info.remote_hosts)    
    for i in unique_hosts:
        print "Number hosts: " + str(RE_info.remote_hosts.count(i)) + " Number processes per job: " + str(RE_info.numberofprocesses) + " Number GlideIns per Host: " + str(RE_info.number_glideins_per_host) + " Index: " + str(RE_info.remote_hosts.index(i))
        nodes = int(RE_info.remote_hosts.count(i)) * int(RE_info.numberofprocesses) 
        lrms = RE_info.remote_host_local_schedulers[RE_info.remote_hosts.index(i)]
        project = RE_info.projects[RE_info.remote_hosts.index(i)]
        queue = RE_info.queues[RE_info.remote_hosts.index(i)]
        workingdirectory = RE_info.workingdirectories[RE_info.remote_hosts.index(i)]
        userproxy=None
        try:
            userproxy = RE_info.userproxy[RE_info.remote_hosts.index(i)]
        except:
            pass
        if(CPR==True):
            lrms_url = "migol://"    
        else:
            lrms_url = "gram://"
        lrms_url = lrms_url + i + "/" + "jobmanager-" + lrms      
        nodes_per_glidein = nodes
        num_glidein = RE_info.number_glideins_per_host
        if (num_glidein != None and num_glidein != 0):
            nodes_per_glidein = nodes/num_glidein # must be divisible 
            print "Glidin URL: " + lrms_url    
            print "hosts: " + str(i) + " number of replica_processes (total): " + str(nodes)
            print "number glide-ins: " + str(num_glidein) + " node per glidein: " + str(nodes_per_glidein)
            print "Project: " + project + " Queue: " + queue + " Working Dir: " +workingdirectory
        
            # start job
            for ng in range(0, RE_info.number_glideins_per_host):
                advert_glidin_job = advert_job.advert_glidin_job(RE_info.advert_host)
                advert_glidin_job.start_glidin_job(lrms_url, 
                                               None,
                                               nodes_per_glidein,
                                               queue,
                                               project,
                                               workingdirectory, userproxy, None)
                if RE_info.advert_glidin_jobs.has_key(i) == False:
                    RE_info.advert_glidin_jobs[i] = []
                RE_info.advert_glidin_jobs[i].append(advert_glidin_job);
                print "Started: " + str(advert_glidin_job)  + " Glide-In Job Number: " + str(ng)+ time.asctime(time.localtime(time.time()))
    def start_glidin_jobs(self):
        """start glidin jobs (advert_job.py) at every unique machine specified in RE_info"""
        for resource in self.resourceMap.keys():
            i = self.resourceMap[resource]
            host = i["host"]
            num_glidein = int(i["number_glide_in"])
            nodes = int(i["number_nodes"])
            lrms = i["scheduler"]
            project = i["allocation"]
            queue = i["queue"]
            workingdirectory = i["working_dir_root"]
            userproxy = None
            try:
                userproxy = i["userproxy"]
            except:
                pass

            if self.cpr == True:
                lrms_url = "migol://"
            else:
                lrms_url = "gram://"

            lrms_url = lrms_url + host + "/" + "jobmanager-" + lrms
            nodes_per_glidein = nodes

            if num_glidein != None and num_glidein != 0:
                nodes_per_glidein = nodes / num_glidein  # must be divisible
                print "Glidin URL: " + lrms_url
                print "hosts: " + str(i) + " number of replica_processes (total): " + str(nodes)
                print "number glide-ins: " + str(num_glidein) + " node per glidein: " + str(nodes_per_glidein)
                print "Project: " + project + " Queue: " + queue + " Working Dir: " + workingdirectory

                # start job
                for ng in range(0, num_glidein):
                    advert_glidin_job = advert_job.advert_glidin_job(self.advert_host)
                    advert_glidin_job.start_glidin_job(
                        lrms_url, self.re_agent, nodes_per_glidein, queue, project, workingdirectory, userproxy, None
                    )
                    if i.has_key("glide_in_jobs") == False:
                        i["glide_in_jobs"] = []
                    i["glide_in_jobs"].append(advert_glidin_job)
                    print "Started: " + str(advert_glidin_job) + " Glide-In Job Number: " + str(ng)
""" Test Job Submission via Advert """
if __name__ == "__main__":

    # Parameter for BigJob
    re_agent = os.getcwd() + "/advert_launcher.sh" # path to agent
    nodes = 64 # number nodes for agent
    lrms_url = "gram://qb1.loni.org/jobmanager-pbs" # resource url
    project = "loni_jha_big" #allocation
    queue = "workq" # queue (PBS)
    workingdirectory="/tmp"  # working directory
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    # start Glide-In job (Replica-Agent)
    print "Start Glide-In at: " + lrms_url
    advert_glidin_job = advert_job.advert_glidin_job(advert_host)
    advert_glidin_job.start_glidin_job(lrms_url,
                                        re_agent,
                                        nodes,
                                        queue,
                                        project,
                                        workingdirectory, 
                                        userproxy)
    print "BigJob URL: " + advert_glidin_job.glidin_url

    # submit sub-job through big-job
    jd = saga.job.description()
    jd.executable = "/home/luckow/src/REMDgManager/bigjob/main"
    jd.number_of_processes = "2"
    jd.spmd_variation = "mpi"
    jd.arguments = [""]
""" Test Job Submission via Advert """
if __name__ == "__main__":

    # Parameter for BigJob
    re_agent = os.getcwd() + "/advert_launcher.sh" # path to agent
    nodes = 64 # number nodes for agent
    lrms_url = "gram://qb1.loni.org/jobmanager-pbs" # resource url
    project = "loni_jha_big" #allocation
    queue = "workq" # queue (PBS)
    workingdirectory="/tmp"  # working directory
    userproxy = None # userproxy (not supported yet due to context issue w/ SAGA)

    # start Glide-In job (Replica-Agent)
    print "Start Glide-In at: " + lrms_url
    advert_glidin_job = advert_job.advert_glidin_job(advert_host)
    advert_glidin_job.start_glidin_job(lrms_url,
                                        re_agent,
                                        nodes,
                                        queue,
                                        project,
                                        workingdirectory, 
                                        userproxy,
                                        None)
    print "BigJob URL: " + advert_glidin_job.glidin_url

    # submit sub-job through big-job
    jd = saga.job.description()
    jd.executable = "/home/luckow/src/REMDgManager/bigjob/main"
    jd.number_of_processes = "2"
    jd.spmd_variation = "mpi"