def test_container(self): ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg' # like ssh -i ...' # create a job service for Futuregrid's 'india' PBS cluster # and attach the SSH security context to it js = saga.job.Service("pbs+ssh://india.futuregrid.org") js.session.contexts.append(ctx) # describe our job jd = saga.job.Description() # resource requirements jd.walltime_limit = "0:05:00" jd.total_cpu_count = 1 # environment, executable & arguments jd.environment = {'SLEEP_TIME':'10'} jd.executable = '/bin/sleep' jd.arguments = ['$SLEEP_TIME'] # output options jd.output = "bliss_pbssh_job.stdout" jd.error = "bliss_pbssh_job.stderr" # create a new job container container = saga.job.Container(js) # create the job (state: New) myjob = js.create_job(jd) container.add(myjob) container.remove(myjob)
def main(): try: # set up a security context (optional) # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.X509 ctx.userproxy = '/tmp/x509up_u501_ncsa' #ctx.userid = 'tg802352' # like 'ssh username@host ...' # create a job service for TACC's 'lonestar' SGE cluster # and attach the SSH security context to it js = saga.job.Service("pbs+gsissh://gsissh.kraken.nics.xsede.org") js.session.contexts.append(ctx) # describe our job jd = saga.job.Description() # project to use jd.project = "TG-MCB090174" # queue name is mandatory for SGE adaptor (?) jd.queue = "small" # resource requirements jd.wall_time_limit = 5 # minutes #jd.total_cpu_count = 13 # the email notification #jd.contact = "*****@*****.**" # environment, executable & arguments jd.environment = {'SLEEP_TIME':'10'} jd.executable = '/bin/sleep' jd.arguments = ['$SLEEP_TIME'] # output options jd.output = "bliss_sgessh_job.stdout" jd.error = "bliss_sgessh_job.stderr" # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) except saga.Exception, ex: print "An error occured during job execution: %s" % (str(ex)) sys.exit(-1)
def main(): try: # set up a security context (optional) # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' # create a job service for Futuregrid's 'india' PBS cluster # and attach the SSH security context to it js = saga.job.Service("pbs+ssh://india.futuregrid.org") js.session.contexts.append(ctx) # describe our job jd = saga.job.Description() # resource requirements jd.wall_time_limit = "0:05:00" jd.total_cpu_count = 1 # environment, executable & arguments jd.environment = {'BFAST_DIR': '/N/u/oweidner/bfast'} jd.working_directory = '/N/u/oweidner/bfast/tmp/' jd.executable = '$BFAST_DIR/bin/bfast' jd.arguments = [ 'match', '-A 1', '-r $BFAST_DIR/data/small/reads_5K/reads.10.fastq', '-f $BFAST_DIR/data/small/reference/hg_2122.fa' ] # output options localtime = time.time() jd.output = "bfast_match_%s.stdout" % localtime jd.error = "bfast_match_%s.stderr" % localtime # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def run(url, outprefix, username, queue, project): """Test if we can execute a remote bash script via 'bash -c' """ try: ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = username # like 'ssh username@host ...' js = saga.job.Service(url) js.session.contexts.append(ctx) # describe our job jd = saga.job.Description() jd.queue = queue jd.project = project jd.wall_time_limit = 5 # minutes # environment, executable & arguments jd.working_directory = "/tmp" jd.environment = {'MYOUTPUT': '"Hello from Bliss"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] # output options jd.output = "%s/bliss_job.08.stdout" % (outprefix) jd.error = "%s/bliss_job.08.stderr" % (outprefix) # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) failed = False why = "" if myjob.get_state() != saga.job.Job.Done: failed = True why = "Job returned in state 'Failed'." except saga.Exception, ex: failed = True why = str(ex)
def test_context_type_EC2(self): c1 = saga.Context() c1.type = saga.Context.EC2 c1.userkey = "/tmp/bliss-test.file1" try: c1.userkey = ("non_existing_file_2345435") self.fail("'userkey' shouldn't accept a non-exsisting file'") except saga.Exception, e: pass
def main(): try: # set up a security context (optional) that describes # our log-in credentials for the bigjob service. # if the bigjob service doesn't have security enabled, # this is not necessary. ctx = saga.Context() ctx.type = saga.Context.BigJob ctx.userid = 'oweidner' ctx.userpass = '******' session = saga.Session() session.contexts.append(ctx) # create a job service that connects to a bigjob # server running. js = saga.job.Service( "bigjob://engage-submit3.renci.org:28082/engage.fork.test", session=session) # describe our job jd = saga.job.Description() # resource requirements jd.wall_time_limit = "0:05:00" jd.number_of_processes = 1 # environment, executable & arguments jd.executable = '/bin/sleep' jd.arguments = ['10'] # output options jd.output = "bigjob_via_saga_api.stdout" jd.error = "bigjob_via_saga_api.stderr" # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def main(): try: # list of resource that are potentially # available machines = { 'xray': 'pbs+ssh://xray.futuregrid.org', 'india': 'pbs+ssh://india.futuregrid.org', 'alamo': 'pbs+ssh://alamo.futuregrid.org', 'louie': 'pbs+ssh://louie.loni.org', 'queenbee': 'pbs+ssh://queenbee.loni.org' } # set up the security context: # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg' # like ssh -i ...' # get some infos about the resources in our # list, like number of waiting jobs, architecture, etc... for machine in machines: print "\nResource: %s" % (machine) # create a discoverer and retrieve a list # of available serivces sdd = saga.sd.Discoverer(machines[machine]) sdd.session.contexts.append(ctx) services = sdd.list_services() # filter: org.ogf.saga.service.job for service in services: # for each service, get some key metrics via the # service data object data = service.get_data() print " * Serivce: '%s', type: '%s', url: '%s'" \ % (service.name, service.type, service.url) print " |- Running Jobs : %s" \ % (data.get_attribute("GlueCEStateRunningJobs")) print " |- Waiting Jobs : %s" \ % (data.get_attribute("GlueCEStateWaitingJobs")) print " |- Memory per Node : %.2f GB" \ % (float(data.get_attribute("GlueHostMainMemoryRAMSize"))/1048576.0) print " |- Total CPUs : %s" \ % (data.get_attribute("GlueSubClusterPhysicalCPUs")) print " |- Free CPUs : %s" \ % (data.get_attribute("GlueCEStateFreeCPUs")) print " '- CPUs per Node : %s" \ % (data.get_attribute("GlueHostArchitectureSMPSize")) except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def main(): try: ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # your identity on the remote machine #ctx.userkey = '/Users/oweidner/.ssh/rsa_work' ses = saga.Session() ses.contexts.append(ctx) # create a job service for lonestar js = saga.job.Service("ssh://localhost") # describe our job jd = saga.job.Description() jd.environment = {'MYOUTPUT': '"Hello from SAGA"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] jd.output = "myjob.stdout" jd.error = "myjob.stderr" # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) outfilesource = 'sftp://localhost/Users/oweidner/myjob.stdout' outfiletarget = 'file://localhost/tmp/' out = saga.filesystem.File(outfilesource, session=ses) out.copy(outfiletarget) print "Staged out %s to %s (size: %s bytes)" % ( outfilesource, outfiletarget, out.get_size()) except saga.Exception, ex: print "An error occured during job execution: %s" % (str(ex)) sys.exit(-1)
def main(): try: # set up a security context (optional) # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg' # like ssh -i ...' # create a job service for Futuregrid's 'india' PBS cluster # and attach the SSH security context to it js = saga.job.Service("pbs+ssh://india.futuregrid.org") js.session.contexts.append(ctx) # describe our job jd = saga.job.Description() # resource requirements jd.wall_time_limit = "0:05:00" jd.total_cpu_count = 1 # environment, executable & arguments jd.environment = {'SLEEP_TIME': '10'} jd.executable = '/bin/sleep' jd.arguments = ['$SLEEP_TIME'] # output options jd.output = "bliss_pbssh_job.stdout" jd.error = "bliss_pbssh_job.stderr" # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def main(): try: c1 = saga.Context() c1.type = saga.Context.SSH c1.usercert = "/Users/s1063117/id_rsa.pub" c1.userkey = "/Users/s1063117/id_rsa" js = saga.job.Service("fork://localhost") js.get_session().add_context(c1) js.get_session().contexts.append(c1) jd = saga.job.Description() jd.set_attribute('Executable', '/bin/sleep') jd.set_vector_attribute('Arguments', ["10"]) xx = saga.Url("fork://localhost:8080") jd.arguments = ['4'] for att in jd.list_attributes(): print "%s %s %s %s" % (att, jd.attribute_is_vector(att), jd.attribute_is_readonly(att), jd.attribute_is_writeable(att)) for job in range(10): job = js.create_job(jd) job.run() print job.get_job_id() + " : " + job.get_state() time.sleep(2) for job in js.list(): rjob = js.get_job(job.get_job_id()) print rjob.get_job_id() + " : " + rjob.get_state() for job in js.list(): job.cancel() print job.get_job_id() + " : " + job.get_state() for job in js.list(): job.run() print job.get_job_id() + " : " + job.get_state() for job in js.list(): job.wait() print job.get_job_id() + " : " + job.get_state() except saga.Exception, ex: print str(ex)
def test_session(self): js = saga.job.Service("fork://localhost") jd = saga.job.Service("fork://localhost") if js.get_session() != jd.get_session(): self.fail("Both objects should return the same (default) session") if js.get_session( ) != saga.Object._Object__shared_state["default_session"]: self.fail("Object should return default session") if jd.get_session( ) != saga.Object._Object__shared_state["default_session"]: self.fail("Object should return default session") js = saga.job.Service("fork://localhost") if js.get_session( ) != saga.Object._Object__shared_state["default_session"]: self.fail("Object should return default session") js = saga.job.Service("fork://localhost", session=saga.Session()) if js.get_session( ) == saga.Object._Object__shared_state["default_session"]: self.fail("Object should not return default session") c1 = saga.Context() s1 = saga.Session() s1.add_context(c1) if len(s1.list_contexts()) != 1: self.fail("Context list length should be 1") s1.remove_context(c1) if len(s1.list_contexts()) != 0: self.fail("Context list length should be 0") s2 = saga.Session() js1 = saga.job.Service("fork://localhost", session=s1) js2 = saga.job.Service("fork://localhost", session=s2) if js1.get_session() == js2.get_session(): self.fail("Sessions shouldn't be identical")
def main(): try: # set up the security context: # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' # create a job service for Futuregrid's 'india' PBS cluster # and attach the SSH security context to it js = saga.job.Service("pbs+ssh://alamo.futuregrid.org") js.session.contexts.append(ctx) # describe our job jd = saga.job.Description() # resource requirements jd.wall_time_limit = "0:05:00" jd.total_cpu_count = 1 # environment, executable & arguments jd.environment = {'SLEEP_TIME':'10'} jd.executable = '/bin/sleep' jd.arguments = ['$SLEEP_TIME'] # create a new job container container = saga.job.Container(js) # create 16 jobs for i in range(16): container.add(js.create_job(jd)) print "\n...starting jobs...\n" container.run() print "\n...waiting for jobs...\n" container.wait(saga.job.WaitMode.All) for job in container.list(): print "Job ID %s (State: %s)" \ % (job.jobid, job.get_state()) except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def main(): try: ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # your identity on the remote machine #ctx.userkey = '/Users/oweidner/.ssh/rsa_work' ses = saga.Session() ses.contexts.append(ctx) # create a job service for lonestar js = saga.job.Service("pbs+ssh://india.futuregrid.org") # describe our job jd = saga.job.Description() jd.environment = {'MYOUTPUT':'"Hello from SAGA"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] jd.output = "my1stjob.stdout" jd.error = "my1stjob.stderr" # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) except saga.Exception, ex: print "An error occured during job execution: %s" % (str(ex)) sys.exit(-1)
def run(url, username): """Test if we can lists a (remote) directory """ try: failed = False ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = username # like 'ssh username@host ...' session = saga.Session() session.contexts.append(ctx) mydir = saga.filesystem.Directory(url, session=session) for entry in mydir.list(): print entry except saga.Exception, ex: failed = True why = str(ex)
def main(): try: js = saga.job.Service("fork://localhost") jd = saga.job.Description() print repr(js.get_session()) print repr(jd.get_session()) c1 = saga.Context() c1.type = saga.Context.SSH c1.usercert = "/Users/s1063117/id_rsa.pub" c1.userkey = "/Users/s1063117/id_rsa" s1 = saga.Session() s1.add_context(c1) assert (len(s1.list_contexts()) == 1) s1.remove_context(c1) assert (len(s1.list_contexts()) == 0) s2 = saga.Session() js = saga.job.Service("fork://localhost", session=s1) jd = saga.job.Description() print repr(js.get_session()) js = saga.job.Service("fork://localhost", session=s2) jd = saga.job.Description() print repr(js.get_session()) s1.add_context(c1) s1.add_context(c1) js = saga.job.Service("fork://localhost") js_s = js.get_session() js_s.add_context(c1) jk = saga.job.Service("fork://localhost") jk_c = js.get_session().list_contexts()[0] assert (jk_c.userkey == "/Users/s1063117/id_rsa") print jk_c.type except saga.Exception, ex: print str(ex)
def main(): try: # set up a security context (optional) # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.usercert = '/Users/s1063117/.ssh/id_rsa' # like ssh -i ...' session = saga.Session() session.contexts.append(ctx) mydir = saga.filesystem.Directory("sftp://india.futuregrid.org/tmp", session=session) for entry in mydir.list(): print entry except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def main(): try: # set up a security context (optional) # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.usercert = '/Users/s1063117/.ssh/id_rsa' # like ssh -i ...' session = saga.Session() session.contexts.append(ctx) # open home directory on a remote machine mydir = saga.filesystem.Directory("sftp://queenbee.loni.org/home/oweidner", session=session) # copy .bash_history to /tmp/ on the local machine mydir.copy('.bash_history', 'sftp://localhost/tmp/') except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def run(self): request_dict = { "kind": "compute#instance", "disks": [{ "kind": "compute#instanceDisk", "type": "PERSISTENT", "mode": "READ", "deviceName": "reference-genome", "source": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/disks/reference-genome" }], "networkInterfaces": [{ "kind": "compute#instanceNetworkInterface", "accessConfigs": [{ "name": "External NAT", "type": "ONE_TO_ONE_NAT" }], "network": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/networks/default" }], "serviceAccounts": [{ "kind": "compute#serviceAccount", "email": "default", "scopes": [ "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/compute", "https://www.googleapis.com/auth/devstorage.full_control" ] }], #"zone": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/zones/us-east1-a", "zone": self.location, #"machineType": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/machine-types/n1-standard-1", "machineType": self.machine_type, "name": self.id, "image": self.image_url } http = httplib2.Http() http = self.credentials.authorize(http) gce = build("compute", "v1beta12", http=http) #result = gce.instances().get(instance="bigjob-pilot", project="bigjob-pilot").execute() gce.instances().insert(project=GCE_PROJECT_ID, body=request_dict).execute() time.sleep(15) # wait for startup #wait for compute instance to become active self.wait_for_running() # spawn BJ agent via SSH compute_instance_details = self.__get_instance_resource() logger.debug("Compute Instance Details: " + str(compute_instance_details)) self.network_ip = compute_instance_details["networkInterfaces"][0][ "accessConfigs"][0]['natIP'] url = "ssh://" + str(self.network_ip) logger.debug("Connect to: %s" % (url)) js = saga.job.Service(url) # Submit job ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = self.pilot_compute_description["vm_ssh_username"] ctx.userkey = self.pilot_compute_description["vm_ssh_keyfile"] js.session.contexts = [ctx] job = js.create_job(self.job_description) print "Submit pilot job to: " + str(url) TRIAL_MAX = 15 trials = 0 while trials < TRIAL_MAX: try: logger.debug("Attempt: %d, submit pilot job to: %s " % (trials, str(url))) job.run() break except: trials = trials + 1 time.sleep(10) if trials == TRIAL_MAX: raise Exception("Submission of agent failed.") logger.debug("Job State : %s" % (job.get_state())) print "Job State : %s" % (job.get_state())
def main(): try: # Optional: # Set up a security context # if no security context is defined, the SFTP # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa # ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner1' # like 'ssh username@host ...' #ctx.userkey = '/Users/oweidner/.ssh/rsa_work' # like ssh -i ...' # Optional: # Append the custom security context to the session session = saga.Session() session.contexts.append(ctx) # create a job service for Futuregrid's 'india' PBS cluster # and attach the SSH security context to it #js = saga.job.Service("pbs+ssh://india.futuregrid.org") # Alternatively: # Use custom session js = saga.job.Service("pbs+ssh://india.futuregrid.org", session=session) # describe our job jd = saga.job.Description() # resource requirements jd.wall_time_limit = 5 #minutes jd.total_cpu_count = 1 # environment, executable & arguments jd.environment = {'HELLO': "\"Hello SAGA\""} jd.executable = '/bin/echo' jd.arguments = ['$HELLO'] # output options jd.output = "bliss_pbssh_job.stdout" jd.error = "bliss_pbssh_job.stderr" # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) except saga.Exception, ex: print "An error occured during job execution: %s" % (str(ex)) sys.exit(-1)
print "Job #%s output copied to local machine: %s (%s bytes)" \ % (jobno, local_file, basedir.get_size(workdir+'/bfast.out')) return diff except saga.Exception, ex: print "An error occured: %s" % (str(ex)) sys.exit(-1) if __name__ == "__main__": NUMJOBS = 32 execution_host = saga.Url("pbs+ssh://queenbee.loni.org") ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.userkey = '/Users/s1063117/.ssh/id_rsa' # like ssh -i ...' session = saga.Session() session.contexts.append(ctx) js = saga.job.Service(execution_host, session) print "\n-------------------------------------" print "Submitting %s jobs sequentially" % NUMJOBS total_time = 0.0 for i in range (0, NUMJOBS):
def run(url, username, queue, project): """Tests if a plug-in can handle both, username as part of the URL (i.e., oweidner@host) and username as part of a context. """ try: ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = username # like 'ssh username@host ...' js = saga.job.Service(url) js.session.contexts.append(ctx) # describe our job jd = saga.job.Description() jd.queue = queue jd.project = project jd.wall_time_limit = 5 # minutes # environment, executable & arguments jd.environment = {'MYOUTPUT': '"Hello from Bliss"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] # output options jd.output = "bliss_job.01b.stdout" jd.error = "bliss_job.01b.stderr" # create the job (state: New) myjob = js.create_job(jd) print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob.run() print "Job ID : %s" % (myjob.jobid) print "Job State : %s" % (myjob.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob.wait() print "Job State : %s" % (myjob.get_state()) print "Exitcode : %s" % (myjob.exitcode) failed = False why = "" if myjob.get_state() != saga.job.Job.Done: failed = True why = "Job 1 returned in state 'Failed'." ####################################################### url2 = saga.Url(url) url2.username = username js2 = saga.job.Service(url2) myjob2 = js.create_job(jd) print "Job ID : %s" % (myjob2.jobid) print "Job State : %s" % (myjob2.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob2.run() print "Job ID : %s" % (myjob2.jobid) print "Job State : %s" % (myjob2.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob2.wait() print "Job State : %s" % (myjob2.get_state()) print "Exitcode : %s" % (myjob2.exitcode) failed = False why = "" if myjob2.get_state() != saga.job.Job.Done: failed = True why = "Job 2 returned in state 'Failed'." except saga.Exception, ex: failed = True why = str(ex)
def main(): try: # Set up a security context (optional). If no security context # is defined, the BigJobSSH plugin will pick up the default set # of ssh credentials for the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg' # like ssh -i ...' # Create a resource manager for Futuregrid's 'india' PBS cluster # and attach the SSH security context to it rm = saga.resource.Manager("pbsbigjob+ssh://alamo.futuregrid.org") rm.session.contexts.append(ctx) # Next, define a compute resource with 64 cores. cdesc = saga.resource.ComputeDescription() cdesc.cores = '64' # Now we can create a compute resource object from the # description and wait for it to reach 'Active' state. cr64 = rm.create_compute(cdesc) cr64.wait(saga.resource.State.Active) # Create a job service from the compute resource js = saga.job.Service.from_compute(cr64) # describe our job jd = saga.job.Description() # resource requirements jd.wall_time_limit = "0:05:00" jd.total_cpu_count = 1 # environment, executable & arguments jd.environment = {'SLEEP_TIME': '10'} jd.executable = '/bin/sleep' jd.arguments = ['$SLEEP_TIME'] # output options jd.output = "bliss_pbssh_job.stdout" jd.error = "bliss_pbssh_job.stderr" #print "Job ID : %s" % (myjob.jobid) #print "Job State : %s" % (myjob.get_state()) #print "\n...starting job...\n" # run the job (submit the job to PBS) #myjob.run() #print "Job ID : %s" % (myjob.jobid) #print "Job State : %s" % (myjob.get_state()) #print "\n...waiting for job...\n" # wait for the job to either finish or fail #myjob.wait() #print "Job State : %s" % (myjob.get_state()) #print "Exitcode : %s" % (myjob.exitcode) # Finally, we can release the compute resource rm.release_compute(cr64) except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def main(): try: # list of resource that are potentially # available machines = { 'xray' : {'url':'pbs+ssh://xray.futuregrid.org', 'metrics':None, 'jobservice':None}, 'india' : {'url':'pbs+ssh://india.futuregrid.org', 'metrics':None, 'jobservice':None}, 'alamo' : {'url':'pbs+ssh://alamo.futuregrid.org', 'metrics':None, 'jobservice':None}, 'louie' : {'url':'pbs+ssh://louie.loni.org', 'metrics':None, 'jobservice':None}, 'queenbee': {'url':'pbs+ssh://queenbee.loni.org', 'metrics':None, 'jobservice':None} } # create a bunch of jobs. at this point they are just # descriptions and not bound to a resource manager jd = saga.job.Description() jd.wall_time_limit = "0:05:00" jd.total_cpu_count = 1 jd.executable = "/bin/sleep" jd.arguments = ["10"] # 5 minutes jobs = [] for i in range(100): jobs.append({'jd':jd, 'jobj':None}) # set up the security context: # if no security context is defined, the PBS # plugin will pick up the default set of ssh # credentials of the user, i.e., ~/.ssh/id_rsa ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = 'oweidner' # like 'ssh username@host ...' ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg' # like ssh -i ...' # get some infos about the machines in our # list, like number of waiting jobs, architecture, etc... for key in machines: print "\nResource: %s" % (key) # create a discoverer and retrieve a list # of available serivces sdd = saga.sd.Discoverer(machines[key]['url']) sdd.session.contexts.append(ctx) services = sdd.list_services() # filter: org.ogf.saga.service.job for service in services: # for each service, get some key metrics via the # service data object machines[key]['metrics'] = service.get_data() data = machines[key]['metrics'] print " * Serivce: '%s', type: '%s', url: '%s'" \ % (service.name, service.type, service.url) print " |- Running Jobs : %s" \ % (data.get_attribute("GlueCEStateRunningJobs")) print " |- Waiting Jobs : %s" \ % (data.get_attribute("GlueCEStateWaitingJobs")) print " |- Total CPUs : %s" \ % (data.get_attribute("GlueSubClusterPhysicalCPUs")) print " |- Free CPUs : %s" \ % (data.get_attribute("GlueCEStateFreeCPUs")) print " '- CPUs per Node : %s" \ % (data.get_attribute("GlueHostArchitectureSMPSize")) # create a job service for each machine. machines[key]['jobservice'] = \ saga.job.Service(machines[key]['url']) print " * Job service up and waiting for jobs..." # now that we have collected information about resources and # instantiated job service endpoints, we can start to submit # jobs, following whatever strategy we want. for job in jobs: job['jobj'] = machines['india']['jobservice'].create_job(job['jd']) job['jobj'].run() for job in jobs: js = job['jobj'].get_state() ji = job['jobj'].get_job_id() print "Job %s state: %s" % (ji, js) except saga.Exception, ex: print "Oh, snap! An error occured: %s" % (str(ex))
def run(url1, url2, username, queue, project): """Test if we can execute a remote bash script via 'bash -c' """ try: ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = username # like 'ssh username@host ...' js1 = saga.job.Service(url1) js2 = saga.job.Service(url2) js1.session.contexts.append(ctx) js2.session.contexts.append(ctx) # describe our job jd = saga.job.Description() jd.queue = queue jd.project = project jd.wall_time_limit = 2 # minutes # environment, executable & arguments jd.environment = {'MYOUTPUT': '"Hello from Bliss"'} jd.executable = '/bin/echo' jd.arguments = ['$MYOUTPUT'] # output options jd.output = "bliss_job.09.stdout" jd.error = "bliss_job.09.stderr" # create the job (state: New) myjob1 = js1.create_job(jd) myjob2 = js2.create_job(jd) print "Job 1 ID : %s" % (myjob1.jobid) print "Job 1 State : %s" % (myjob1.get_state()) print "Job 2 ID : %s" % (myjob2.jobid) print "Job 2 State : %s" % (myjob2.get_state()) print "\n...starting job...\n" # run the job (submit the job to PBS) myjob1.run() myjob2.run() print "Job 1 ID : %s" % (myjob1.jobid) print "Job 1 State : %s" % (myjob1.get_state()) print "Job 2 ID : %s" % (myjob2.jobid) print "Job 2 State : %s" % (myjob2.get_state()) print "\n...waiting for job...\n" # wait for the job to either finish or fail myjob1.wait() myjob2.wait() print "Job 1 State : %s" % (myjob1.get_state()) print "Job 1 Exitcode : %s" % (myjob1.exitcode) print "Job 2 State : %s" % (myjob2.get_state()) print "Job 2 Exitcode : %s" % (myjob2.exitcode) failed = False why = "" if myjob1.get_state() != saga.job.Job.Done: failed = True why += "Job 1 returned in state 'Failed'. " raise (why) if myjob2.get_state() != saga.job.Job.Done: failed = True why += "Job 2 returned in state 'Failed'. " raise (why) except saga.Exception, ex: why = str(ex) print "" print "============================================" print "The job seems to have FAILED!" print "============================================" print " " print "%s" % (why) print "Please run this test again with SAGA_VERBOSE=5 " print "and report the results at: " print "" print "https://github.com/saga-project/bliss/issues\n" return -1
def run(self): """ Start VM and start BJ agent via SSH on VM """ """ Map fields of Pilot description to EC2 API { "vm_id":"ami-d7f742be", "vm_ssh_username":"******", "vm_ssh_keyname":"MyKey", "vm_ssh_keyfile":"<path>", "vm_type":"t1.micro", "access_key_id":"xxx", "secret_access_key":"xxx" } """ reservation = self.ec2_conn.run_instances( self.pilot_compute_description["vm_id"], key_name=self.pilot_compute_description["vm_ssh_keyname"], instance_type=self.pilot_compute_description["vm_type"], security_groups=[SECURITY_GROUP]) self.instance = reservation.instances[0] self.instance_id = self.instance.id logger.debug("Started EC2/Eucalyptus/Nova instance: %s" % self.instance_id) time.sleep(5) self.wait_for_running() if self.resource_url.scheme != "euca+ssh" and self.resource_url.scheme != "nova+ssh": self.ec2_conn.create_tags([self.instance_id], {"Name": self.id}) self.network_ip = self.instance.ip_address url = "ssh://" + str(self.network_ip) logger.debug("Connect to: %s" % (url)) js = saga.job.Service(url) # Submit job ctx = saga.Context() ctx.type = saga.Context.SSH ctx.userid = self.pilot_compute_description["vm_ssh_username"] ctx.userkey = self.pilot_compute_description["vm_ssh_keyfile"] js.session.contexts = [ctx] logger.debug("Job Description Type: " + str(type(self.job_description))) job = js.create_job(self.job_description) TRIAL_MAX = 30 trials = 0 while trials < TRIAL_MAX: try: logger.debug("Attempt: %d, submit pilot job to: %s " % (trials, str(url))) job.run() break except: exc_type, exc_value, exc_traceback = sys.exc_info() logger.warning("Submission failed: " + str(exc_value)) #self.__print_traceback() trials = trials + 1 time.sleep(30) if trials == TRIAL_MAX: raise Exception("Submission of agent failed.") logger.debug("Job State : %s" % (job.get_state()))