Ejemplo n.º 1
0
    def test_container(self):

        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid  = 'oweidner' # like 'ssh username@host ...'
        ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg' # like ssh -i ...'
 
        # create a job service for Futuregrid's 'india' PBS cluster
        # and attach the SSH security context to it
        js = saga.job.Service("pbs+ssh://india.futuregrid.org")
        js.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()
        # resource requirements
        jd.walltime_limit  = "0:05:00"
        jd.total_cpu_count = 1     
        # environment, executable & arguments
        jd.environment = {'SLEEP_TIME':'10'}       
        jd.executable  = '/bin/sleep'
        jd.arguments   = ['$SLEEP_TIME']
        # output options
        jd.output = "bliss_pbssh_job.stdout"
        jd.error  = "bliss_pbssh_job.stderr"

        # create a new job container
        container = saga.job.Container(js)

        # create the job (state: New)
        myjob = js.create_job(jd)
        container.add(myjob)
        container.remove(myjob)
Ejemplo n.º 2
0
def main():
    
    try:
        # set up a security context (optional)
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh 
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.X509
        ctx.userproxy = '/tmp/x509up_u501_ncsa'
        #ctx.userid  = 'tg802352' # like 'ssh username@host ...'
 
        # create a job service for TACC's 'lonestar' SGE cluster
        # and attach the SSH security context to it
        js = saga.job.Service("pbs+gsissh://gsissh.kraken.nics.xsede.org")
        js.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()
        # project to use
        jd.project = "TG-MCB090174"
        # queue name is mandatory for SGE adaptor (?) 
        jd.queue   = "small"

        # resource requirements
        jd.wall_time_limit = 5 # minutes
        #jd.total_cpu_count = 13
        # the email notification
        #jd.contact = "*****@*****.**" 
        # environment, executable & arguments
        jd.environment = {'SLEEP_TIME':'10'}       
        jd.executable  = '/bin/sleep'
        jd.arguments   = ['$SLEEP_TIME']
        # output options
        jd.output = "bliss_sgessh_job.stdout"
        jd.error  = "bliss_sgessh_job.stderr"

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

    except saga.Exception, ex:
        print "An error occured during job execution: %s" % (str(ex))
        sys.exit(-1)
Ejemplo n.º 3
0
def main():

    try:
        # set up a security context (optional)
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = 'oweidner'  # like 'ssh username@host ...'

        # create a job service for Futuregrid's 'india' PBS cluster
        # and attach the SSH security context to it
        js = saga.job.Service("pbs+ssh://india.futuregrid.org")
        js.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()
        # resource requirements
        jd.wall_time_limit = "0:05:00"
        jd.total_cpu_count = 1
        # environment, executable & arguments
        jd.environment = {'BFAST_DIR': '/N/u/oweidner/bfast'}
        jd.working_directory = '/N/u/oweidner/bfast/tmp/'
        jd.executable = '$BFAST_DIR/bin/bfast'
        jd.arguments = [
            'match', '-A 1',
            '-r $BFAST_DIR/data/small/reads_5K/reads.10.fastq',
            '-f $BFAST_DIR/data/small/reference/hg_2122.fa'
        ]
        # output options
        localtime = time.time()
        jd.output = "bfast_match_%s.stdout" % localtime
        jd.error = "bfast_match_%s.stderr" % localtime

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 4
0
def run(url, outprefix, username, queue, project):
    """Test if we can execute a remote bash script via 'bash -c'
    """
    try:
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = username  # like 'ssh username@host ...'

        js = saga.job.Service(url)
        js.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()

        jd.queue = queue
        jd.project = project
        jd.wall_time_limit = 5  # minutes

        # environment, executable & arguments
        jd.working_directory = "/tmp"
        jd.environment = {'MYOUTPUT': '"Hello from Bliss"'}
        jd.executable = '/bin/echo'
        jd.arguments = ['$MYOUTPUT']

        # output options
        jd.output = "%s/bliss_job.08.stdout" % (outprefix)
        jd.error = "%s/bliss_job.08.stderr" % (outprefix)

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

        failed = False
        why = ""
        if myjob.get_state() != saga.job.Job.Done:
            failed = True
            why = "Job returned in state 'Failed'."

    except saga.Exception, ex:
        failed = True
        why = str(ex)
Ejemplo n.º 5
0
    def test_context_type_EC2(self):

        c1 = saga.Context()
        c1.type = saga.Context.EC2
        c1.userkey = "/tmp/bliss-test.file1"
        try:
            c1.userkey = ("non_existing_file_2345435")
            self.fail("'userkey' shouldn't accept a non-exsisting file'")
        except saga.Exception, e:
            pass
Ejemplo n.º 6
0
def main():

    try:
        # set up a security context (optional) that describes
        # our log-in credentials for the bigjob service.
        # if the bigjob service doesn't have security enabled,
        # this is not necessary.
        ctx = saga.Context()
        ctx.type = saga.Context.BigJob
        ctx.userid = 'oweidner'
        ctx.userpass = '******'

        session = saga.Session()
        session.contexts.append(ctx)

        # create a job service that connects to a bigjob
        # server running.
        js = saga.job.Service(
            "bigjob://engage-submit3.renci.org:28082/engage.fork.test",
            session=session)

        # describe our job
        jd = saga.job.Description()
        # resource requirements
        jd.wall_time_limit = "0:05:00"
        jd.number_of_processes = 1
        # environment, executable & arguments
        jd.executable = '/bin/sleep'
        jd.arguments = ['10']
        # output options
        jd.output = "bigjob_via_saga_api.stdout"
        jd.error = "bigjob_via_saga_api.stderr"

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 7
0
def main():

    try:
        # list of resource that are potentially
        # available
        machines = {
            'xray': 'pbs+ssh://xray.futuregrid.org',
            'india': 'pbs+ssh://india.futuregrid.org',
            'alamo': 'pbs+ssh://alamo.futuregrid.org',
            'louie': 'pbs+ssh://louie.loni.org',
            'queenbee': 'pbs+ssh://queenbee.loni.org'
        }

        # set up the security context:
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = 'oweidner'  # like 'ssh username@host ...'
        ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg'  # like ssh -i ...'

        # get some infos about the resources in our
        # list, like number of waiting jobs, architecture, etc...
        for machine in machines:
            print "\nResource: %s" % (machine)
            # create a discoverer and retrieve a list
            # of available serivces
            sdd = saga.sd.Discoverer(machines[machine])
            sdd.session.contexts.append(ctx)
            services = sdd.list_services()  # filter: org.ogf.saga.service.job

            for service in services:
                # for each service, get some key metrics via the
                # service data object
                data = service.get_data()

                print "  * Serivce: '%s', type: '%s', url: '%s'" \
                  % (service.name, service.type, service.url)
                print "    |- Running Jobs         : %s" \
                  % (data.get_attribute("GlueCEStateRunningJobs"))
                print "    |- Waiting Jobs         : %s" \
                  % (data.get_attribute("GlueCEStateWaitingJobs"))
                print "    |- Memory per Node      : %.2f GB" \
                  % (float(data.get_attribute("GlueHostMainMemoryRAMSize"))/1048576.0)
                print "    |- Total CPUs           : %s" \
                  % (data.get_attribute("GlueSubClusterPhysicalCPUs"))
                print "    |- Free CPUs            : %s" \
                  % (data.get_attribute("GlueCEStateFreeCPUs"))
                print "    '- CPUs per Node        : %s" \
                  % (data.get_attribute("GlueHostArchitectureSMPSize"))

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 8
0
def main():
    try:
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = 'oweidner'  # your identity on the remote machine
        #ctx.userkey = '/Users/oweidner/.ssh/rsa_work'

        ses = saga.Session()
        ses.contexts.append(ctx)
        # create a job service for lonestar
        js = saga.job.Service("ssh://localhost")

        # describe our job
        jd = saga.job.Description()

        jd.environment = {'MYOUTPUT': '"Hello from SAGA"'}
        jd.executable = '/bin/echo'
        jd.arguments = ['$MYOUTPUT']
        jd.output = "myjob.stdout"
        jd.error = "myjob.stderr"

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

        outfilesource = 'sftp://localhost/Users/oweidner/myjob.stdout'
        outfiletarget = 'file://localhost/tmp/'
        out = saga.filesystem.File(outfilesource, session=ses)
        out.copy(outfiletarget)

        print "Staged out %s to %s (size: %s bytes)" % (
            outfilesource, outfiletarget, out.get_size())

    except saga.Exception, ex:
        print "An error occured during job execution: %s" % (str(ex))
        sys.exit(-1)
Ejemplo n.º 9
0
def main():

    try:
        # set up a security context (optional)
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = 'oweidner'  # like 'ssh username@host ...'
        ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg'  # like ssh -i ...'

        # create a job service for Futuregrid's 'india' PBS cluster
        # and attach the SSH security context to it
        js = saga.job.Service("pbs+ssh://india.futuregrid.org")
        js.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()
        # resource requirements
        jd.wall_time_limit = "0:05:00"
        jd.total_cpu_count = 1
        # environment, executable & arguments
        jd.environment = {'SLEEP_TIME': '10'}
        jd.executable = '/bin/sleep'
        jd.arguments = ['$SLEEP_TIME']
        # output options
        jd.output = "bliss_pbssh_job.stdout"
        jd.error = "bliss_pbssh_job.stderr"

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 10
0
def main():
    try:
        c1 = saga.Context()
        c1.type = saga.Context.SSH
        c1.usercert = "/Users/s1063117/id_rsa.pub"
        c1.userkey = "/Users/s1063117/id_rsa"

        js = saga.job.Service("fork://localhost")
        js.get_session().add_context(c1)
        js.get_session().contexts.append(c1)

        jd = saga.job.Description()
        jd.set_attribute('Executable', '/bin/sleep')
        jd.set_vector_attribute('Arguments', ["10"])

        xx = saga.Url("fork://localhost:8080")
        jd.arguments = ['4']
        for att in jd.list_attributes():
            print "%s %s %s %s" % (att, jd.attribute_is_vector(att),
                                   jd.attribute_is_readonly(att),
                                   jd.attribute_is_writeable(att))

        for job in range(10):
            job = js.create_job(jd)
            job.run()
            print job.get_job_id() + " : " + job.get_state()

        time.sleep(2)

        for job in js.list():
            rjob = js.get_job(job.get_job_id())
            print rjob.get_job_id() + " : " + rjob.get_state()

        for job in js.list():
            job.cancel()
            print job.get_job_id() + " : " + job.get_state()

        for job in js.list():
            job.run()
            print job.get_job_id() + " : " + job.get_state()

        for job in js.list():
            job.wait()
            print job.get_job_id() + " : " + job.get_state()

    except saga.Exception, ex:
        print str(ex)
Ejemplo n.º 11
0
    def test_session(self):

        js = saga.job.Service("fork://localhost")
        jd = saga.job.Service("fork://localhost")

        if js.get_session() != jd.get_session():
            self.fail("Both objects should return the same (default) session")

        if js.get_session(
        ) != saga.Object._Object__shared_state["default_session"]:
            self.fail("Object should return default session")

        if jd.get_session(
        ) != saga.Object._Object__shared_state["default_session"]:
            self.fail("Object should return default session")

        js = saga.job.Service("fork://localhost")
        if js.get_session(
        ) != saga.Object._Object__shared_state["default_session"]:
            self.fail("Object should return default session")

        js = saga.job.Service("fork://localhost", session=saga.Session())
        if js.get_session(
        ) == saga.Object._Object__shared_state["default_session"]:
            self.fail("Object should not return default session")

        c1 = saga.Context()
        s1 = saga.Session()
        s1.add_context(c1)

        if len(s1.list_contexts()) != 1:
            self.fail("Context list length should be 1")

        s1.remove_context(c1)

        if len(s1.list_contexts()) != 0:
            self.fail("Context list length should be 0")

        s2 = saga.Session()

        js1 = saga.job.Service("fork://localhost", session=s1)
        js2 = saga.job.Service("fork://localhost", session=s2)

        if js1.get_session() == js2.get_session():
            self.fail("Sessions shouldn't be identical")
Ejemplo n.º 12
0
def main():
    
    try:
        # set up the security context:
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh 
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid  = 'oweidner' # like 'ssh username@host ...'
 
        # create a job service for Futuregrid's 'india' PBS cluster
        # and attach the SSH security context to it
        js = saga.job.Service("pbs+ssh://alamo.futuregrid.org")
        js.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()
        # resource requirements
        jd.wall_time_limit  = "0:05:00"
        jd.total_cpu_count = 1     
        # environment, executable & arguments
        jd.environment = {'SLEEP_TIME':'10'}       
        jd.executable  = '/bin/sleep'
        jd.arguments   = ['$SLEEP_TIME']

        # create a new job container
        container = saga.job.Container(js)

        # create 16 jobs
        for i in range(16):
            container.add(js.create_job(jd))

        print "\n...starting jobs...\n"
        container.run()

        print "\n...waiting for jobs...\n" 
        container.wait(saga.job.WaitMode.All)

        for job in container.list():
            print "Job ID %s (State: %s)" \
              % (job.jobid, job.get_state())

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 13
0
def main():
    try:
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid  = 'oweidner' # your identity on the remote machine
        #ctx.userkey = '/Users/oweidner/.ssh/rsa_work'

        ses = saga.Session()
        ses.contexts.append(ctx) 
        # create a job service for lonestar
        js = saga.job.Service("pbs+ssh://india.futuregrid.org")

        # describe our job
        jd = saga.job.Description()

        jd.environment     = {'MYOUTPUT':'"Hello from SAGA"'}       
        jd.executable      = '/bin/echo'
        jd.arguments       = ['$MYOUTPUT']
        jd.output          = "my1stjob.stdout"
        jd.error           = "my1stjob.stderr"

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job 
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

    except saga.Exception, ex:
        print "An error occured during job execution: %s" % (str(ex))
        sys.exit(-1)
Ejemplo n.º 14
0
def run(url, username):
    """Test if we can lists a (remote) directory
    """
    try:
        failed = False
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = username  # like 'ssh username@host ...'

        session = saga.Session()
        session.contexts.append(ctx)

        mydir = saga.filesystem.Directory(url, session=session)
        for entry in mydir.list():
            print entry

    except saga.Exception, ex:
        failed = True
        why = str(ex)
Ejemplo n.º 15
0
def main():
    try:
        js = saga.job.Service("fork://localhost")
        jd = saga.job.Description()
        print repr(js.get_session())
        print repr(jd.get_session())

        c1 = saga.Context()
        c1.type = saga.Context.SSH
        c1.usercert = "/Users/s1063117/id_rsa.pub"
        c1.userkey = "/Users/s1063117/id_rsa"
        s1 = saga.Session()
        s1.add_context(c1)

        assert (len(s1.list_contexts()) == 1)
        s1.remove_context(c1)
        assert (len(s1.list_contexts()) == 0)

        s2 = saga.Session()

        js = saga.job.Service("fork://localhost", session=s1)
        jd = saga.job.Description()
        print repr(js.get_session())

        js = saga.job.Service("fork://localhost", session=s2)
        jd = saga.job.Description()
        print repr(js.get_session())

        s1.add_context(c1)
        s1.add_context(c1)

        js = saga.job.Service("fork://localhost")
        js_s = js.get_session()
        js_s.add_context(c1)

        jk = saga.job.Service("fork://localhost")
        jk_c = js.get_session().list_contexts()[0]
        assert (jk_c.userkey == "/Users/s1063117/id_rsa")
        print jk_c.type

    except saga.Exception, ex:
        print str(ex)
Ejemplo n.º 16
0
def main():

    try:
        # set up a security context (optional)
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = 'oweidner'  # like 'ssh username@host ...'
        ctx.usercert = '/Users/s1063117/.ssh/id_rsa'  # like ssh -i ...'

        session = saga.Session()
        session.contexts.append(ctx)

        mydir = saga.filesystem.Directory("sftp://india.futuregrid.org/tmp",
                                          session=session)
        for entry in mydir.list():
            print entry

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 17
0
def main():
    
    try:
        # set up a security context (optional)
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh 
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid  = 'oweidner' # like 'ssh username@host ...'
        ctx.usercert = '/Users/s1063117/.ssh/id_rsa' # like ssh -i ...'

        session = saga.Session()
        session.contexts.append(ctx)
 
        # open home directory on a remote machine
        mydir = saga.filesystem.Directory("sftp://queenbee.loni.org/home/oweidner", session=session)

        # copy .bash_history to /tmp/ on the local machine
        mydir.copy('.bash_history', 'sftp://localhost/tmp/') 


    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 18
0
    def run(self):
        request_dict = {
            "kind":
            "compute#instance",
            "disks": [{
                "kind":
                "compute#instanceDisk",
                "type":
                "PERSISTENT",
                "mode":
                "READ",
                "deviceName":
                "reference-genome",
                "source":
                "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/disks/reference-genome"
            }],
            "networkInterfaces": [{
                "kind":
                "compute#instanceNetworkInterface",
                "accessConfigs": [{
                    "name": "External NAT",
                    "type": "ONE_TO_ONE_NAT"
                }],
                "network":
                "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/networks/default"
            }],
            "serviceAccounts": [{
                "kind":
                "compute#serviceAccount",
                "email":
                "default",
                "scopes": [
                    "https://www.googleapis.com/auth/userinfo.email",
                    "https://www.googleapis.com/auth/compute",
                    "https://www.googleapis.com/auth/devstorage.full_control"
                ]
            }],
            #"zone": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/zones/us-east1-a",
            "zone":
            self.location,
            #"machineType": "https://www.googleapis.com/compute/v1beta12/projects/bigjob-pilot/machine-types/n1-standard-1",
            "machineType":
            self.machine_type,
            "name":
            self.id,
            "image":
            self.image_url
        }

        http = httplib2.Http()
        http = self.credentials.authorize(http)
        gce = build("compute", "v1beta12", http=http)
        #result = gce.instances().get(instance="bigjob-pilot", project="bigjob-pilot").execute()
        gce.instances().insert(project=GCE_PROJECT_ID,
                               body=request_dict).execute()

        time.sleep(15)  # wait for startup
        #wait for compute instance to become active
        self.wait_for_running()

        # spawn BJ agent via SSH
        compute_instance_details = self.__get_instance_resource()
        logger.debug("Compute Instance Details: " +
                     str(compute_instance_details))
        self.network_ip = compute_instance_details["networkInterfaces"][0][
            "accessConfigs"][0]['natIP']
        url = "ssh://" + str(self.network_ip)
        logger.debug("Connect to: %s" % (url))
        js = saga.job.Service(url)

        # Submit job
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = self.pilot_compute_description["vm_ssh_username"]
        ctx.userkey = self.pilot_compute_description["vm_ssh_keyfile"]
        js.session.contexts = [ctx]

        job = js.create_job(self.job_description)
        print "Submit pilot job to: " + str(url)

        TRIAL_MAX = 15
        trials = 0
        while trials < TRIAL_MAX:
            try:
                logger.debug("Attempt: %d, submit pilot job to: %s " %
                             (trials, str(url)))
                job.run()
                break
            except:
                trials = trials + 1
                time.sleep(10)
                if trials == TRIAL_MAX:
                    raise Exception("Submission of agent failed.")

        logger.debug("Job State : %s" % (job.get_state()))

        print "Job State : %s" % (job.get_state())
Ejemplo n.º 19
0
def main():

    try:
        # Optional:
        # Set up a security context
        # if no security context is defined, the SFTP
        # plugin will pick up the default set of ssh
        # credentials of the user, i.e., ~/.ssh/id_rsa
        #
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = 'oweidner1'  # like 'ssh username@host ...'
        #ctx.userkey = '/Users/oweidner/.ssh/rsa_work' # like ssh -i ...'

        # Optional:
        # Append the custom security context to the session
        session = saga.Session()
        session.contexts.append(ctx)

        # create a job service for Futuregrid's 'india' PBS cluster
        # and attach the SSH security context to it
        #js = saga.job.Service("pbs+ssh://india.futuregrid.org")
        # Alternatively:
        # Use custom session
        js = saga.job.Service("pbs+ssh://india.futuregrid.org",
                              session=session)

        # describe our job
        jd = saga.job.Description()
        # resource requirements
        jd.wall_time_limit = 5  #minutes
        jd.total_cpu_count = 1
        # environment, executable & arguments
        jd.environment = {'HELLO': "\"Hello SAGA\""}
        jd.executable = '/bin/echo'
        jd.arguments = ['$HELLO']
        # output options
        jd.output = "bliss_pbssh_job.stdout"
        jd.error = "bliss_pbssh_job.stderr"

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

    except saga.Exception, ex:
        print "An error occured during job execution: %s" % (str(ex))
        sys.exit(-1)
Ejemplo n.º 20
0
        print "Job #%s output copied to local machine: %s (%s bytes)" \
          % (jobno, local_file, basedir.get_size(workdir+'/bfast.out'))

        return diff

    except saga.Exception, ex:
        print "An error occured: %s" % (str(ex))
        sys.exit(-1)


if __name__ == "__main__":

    NUMJOBS = 32

    execution_host = saga.Url("pbs+ssh://queenbee.loni.org") 
    ctx = saga.Context()
    ctx.type = saga.Context.SSH
    ctx.userid  = 'oweidner' # like 'ssh username@host ...'
    ctx.userkey = '/Users/s1063117/.ssh/id_rsa' # like ssh -i ...'

    session = saga.Session()
    session.contexts.append(ctx)

    js = saga.job.Service(execution_host, session)
  
    print "\n-------------------------------------"
    print "Submitting %s jobs sequentially" % NUMJOBS
 
    total_time = 0.0 

    for i in range (0, NUMJOBS):
Ejemplo n.º 21
0
def run(url, username, queue, project):
    """Tests if a plug-in can handle both, username
       as part of the URL (i.e., oweidner@host) and
       username as part of a context.
    """
    try:
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = username  # like 'ssh username@host ...'

        js = saga.job.Service(url)
        js.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()

        jd.queue = queue
        jd.project = project
        jd.wall_time_limit = 5  # minutes

        # environment, executable & arguments
        jd.environment = {'MYOUTPUT': '"Hello from Bliss"'}
        jd.executable = '/bin/echo'
        jd.arguments = ['$MYOUTPUT']

        # output options
        jd.output = "bliss_job.01b.stdout"
        jd.error = "bliss_job.01b.stderr"

        # create the job (state: New)
        myjob = js.create_job(jd)

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob.run()

        print "Job ID    : %s" % (myjob.jobid)
        print "Job State : %s" % (myjob.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob.wait()

        print "Job State : %s" % (myjob.get_state())
        print "Exitcode  : %s" % (myjob.exitcode)

        failed = False
        why = ""
        if myjob.get_state() != saga.job.Job.Done:
            failed = True
            why = "Job 1 returned in state 'Failed'."

        #######################################################

        url2 = saga.Url(url)
        url2.username = username

        js2 = saga.job.Service(url2)
        myjob2 = js.create_job(jd)

        print "Job ID    : %s" % (myjob2.jobid)
        print "Job State : %s" % (myjob2.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob2.run()

        print "Job ID    : %s" % (myjob2.jobid)
        print "Job State : %s" % (myjob2.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob2.wait()

        print "Job State : %s" % (myjob2.get_state())
        print "Exitcode  : %s" % (myjob2.exitcode)

        failed = False
        why = ""
        if myjob2.get_state() != saga.job.Job.Done:
            failed = True
            why = "Job 2 returned in state 'Failed'."

    except saga.Exception, ex:
        failed = True
        why = str(ex)
Ejemplo n.º 22
0
def main():

    try:
        # Set up a security context (optional). If no security context
        # is defined, the BigJobSSH plugin will pick up the default set
        # of ssh credentials for the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = 'oweidner'  # like 'ssh username@host ...'
        ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg'  # like ssh -i ...'

        # Create a resource manager for Futuregrid's 'india' PBS cluster
        # and attach the SSH security context to it
        rm = saga.resource.Manager("pbsbigjob+ssh://alamo.futuregrid.org")
        rm.session.contexts.append(ctx)

        # Next, define a compute resource with 64 cores.
        cdesc = saga.resource.ComputeDescription()
        cdesc.cores = '64'

        # Now we can create a compute resource object from the
        # description and wait for it to reach 'Active' state.
        cr64 = rm.create_compute(cdesc)
        cr64.wait(saga.resource.State.Active)

        # Create a job service from the compute resource
        js = saga.job.Service.from_compute(cr64)

        # describe our job
        jd = saga.job.Description()
        # resource requirements
        jd.wall_time_limit = "0:05:00"
        jd.total_cpu_count = 1
        # environment, executable & arguments
        jd.environment = {'SLEEP_TIME': '10'}
        jd.executable = '/bin/sleep'
        jd.arguments = ['$SLEEP_TIME']
        # output options
        jd.output = "bliss_pbssh_job.stdout"
        jd.error = "bliss_pbssh_job.stderr"

        #print "Job ID    : %s" % (myjob.jobid)
        #print "Job State : %s" % (myjob.get_state())

        #print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        #myjob.run()

        #print "Job ID    : %s" % (myjob.jobid)
        #print "Job State : %s" % (myjob.get_state())

        #print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        #myjob.wait()

        #print "Job State : %s" % (myjob.get_state())
        #print "Exitcode  : %s" % (myjob.exitcode)

        # Finally, we can release the compute resource
        rm.release_compute(cr64)

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 23
0
def main():
    
    try:
        # list of resource that are potentially 
        # available 
        machines = {
          'xray'    : {'url':'pbs+ssh://xray.futuregrid.org',
                       'metrics':None, 'jobservice':None},
          'india'   : {'url':'pbs+ssh://india.futuregrid.org',
                       'metrics':None, 'jobservice':None},
          'alamo'   : {'url':'pbs+ssh://alamo.futuregrid.org',
                       'metrics':None, 'jobservice':None},
          'louie'   : {'url':'pbs+ssh://louie.loni.org',
                       'metrics':None, 'jobservice':None},
          'queenbee': {'url':'pbs+ssh://queenbee.loni.org',
                       'metrics':None, 'jobservice':None}
        }

        # create a bunch of jobs. at this point they are just 
        # descriptions and not bound to a resource manager
        jd = saga.job.Description()
        jd.wall_time_limit  = "0:05:00"
        jd.total_cpu_count = 1     
        jd.executable      = "/bin/sleep"
        jd.arguments       = ["10"] # 5 minutes

        jobs = []        
        for i in range(100):
            jobs.append({'jd':jd, 'jobj':None})

        # set up the security context:
        # if no security context is defined, the PBS
        # plugin will pick up the default set of ssh 
        # credentials of the user, i.e., ~/.ssh/id_rsa
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid  = 'oweidner' # like 'ssh username@host ...'
        ctx.usercert = '/Users/oweidner/.ssh/id_rsa_fg' # like ssh -i ...'

        # get some infos about the machines in our 
        # list, like number of waiting jobs, architecture, etc... 
        for key in machines:
            print "\nResource: %s" % (key)
            # create a discoverer and retrieve a list
            # of available serivces 
            sdd = saga.sd.Discoverer(machines[key]['url'])
            sdd.session.contexts.append(ctx)
            services = sdd.list_services() # filter: org.ogf.saga.service.job

            for service in services:
                # for each service, get some key metrics via the
                # service data object
                machines[key]['metrics'] = service.get_data()
                data = machines[key]['metrics'] 

                print "  * Serivce: '%s', type: '%s', url: '%s'" \
                  % (service.name, service.type, service.url)
                print "    |- Running Jobs         : %s" \
                  % (data.get_attribute("GlueCEStateRunningJobs"))      
                print "    |- Waiting Jobs         : %s" \
                  % (data.get_attribute("GlueCEStateWaitingJobs"))    
                print "    |- Total CPUs           : %s" \
                  % (data.get_attribute("GlueSubClusterPhysicalCPUs"))      
                print "    |- Free CPUs            : %s" \
                  % (data.get_attribute("GlueCEStateFreeCPUs"))      
                print "    '- CPUs per Node        : %s" \
                  % (data.get_attribute("GlueHostArchitectureSMPSize"))      

            # create a job service for each machine.
            machines[key]['jobservice'] = \
                saga.job.Service(machines[key]['url'])
            print "  * Job service up and waiting for jobs..."

        # now that we have collected information about resources and 
        # instantiated job service endpoints, we can start to submit
        # jobs, following whatever strategy we want.
        for job in jobs:
            job['jobj'] = machines['india']['jobservice'].create_job(job['jd'])
            job['jobj'].run()

        for job in jobs:
            js = job['jobj'].get_state()
            ji = job['jobj'].get_job_id() 
            print "Job %s state: %s" % (ji, js)


             

    except saga.Exception, ex:
        print "Oh, snap! An error occured: %s" % (str(ex))
Ejemplo n.º 24
0
def run(url1, url2, username, queue, project):
    """Test if we can execute a remote bash script via 'bash -c'
    """
    try:
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = username  # like 'ssh username@host ...'

        js1 = saga.job.Service(url1)
        js2 = saga.job.Service(url2)
        js1.session.contexts.append(ctx)
        js2.session.contexts.append(ctx)

        # describe our job
        jd = saga.job.Description()

        jd.queue = queue
        jd.project = project
        jd.wall_time_limit = 2  # minutes

        # environment, executable & arguments
        jd.environment = {'MYOUTPUT': '"Hello from Bliss"'}
        jd.executable = '/bin/echo'
        jd.arguments = ['$MYOUTPUT']

        # output options
        jd.output = "bliss_job.09.stdout"
        jd.error = "bliss_job.09.stderr"

        # create the job (state: New)
        myjob1 = js1.create_job(jd)
        myjob2 = js2.create_job(jd)

        print "Job 1 ID    : %s" % (myjob1.jobid)
        print "Job 1 State : %s" % (myjob1.get_state())
        print "Job 2 ID    : %s" % (myjob2.jobid)
        print "Job 2 State : %s" % (myjob2.get_state())

        print "\n...starting job...\n"
        # run the job (submit the job to PBS)
        myjob1.run()
        myjob2.run()

        print "Job 1 ID    : %s" % (myjob1.jobid)
        print "Job 1 State : %s" % (myjob1.get_state())
        print "Job 2 ID    : %s" % (myjob2.jobid)
        print "Job 2 State : %s" % (myjob2.get_state())

        print "\n...waiting for job...\n"
        # wait for the job to either finish or fail
        myjob1.wait()
        myjob2.wait()

        print "Job 1 State : %s" % (myjob1.get_state())
        print "Job 1 Exitcode  : %s" % (myjob1.exitcode)
        print "Job 2 State : %s" % (myjob2.get_state())
        print "Job 2 Exitcode  : %s" % (myjob2.exitcode)

        failed = False
        why = ""
        if myjob1.get_state() != saga.job.Job.Done:
            failed = True
            why += "Job 1 returned in state 'Failed'. "
            raise (why)

        if myjob2.get_state() != saga.job.Job.Done:
            failed = True
            why += "Job 2 returned in state 'Failed'. "
            raise (why)

    except saga.Exception, ex:
        why = str(ex)

        print ""
        print "============================================"
        print "The job seems to have FAILED!"
        print "============================================"
        print "                                            "
        print "%s" % (why)
        print "Please run this test again with SAGA_VERBOSE=5 "
        print "and report the results at: "
        print ""
        print "https://github.com/saga-project/bliss/issues\n"

        return -1
Ejemplo n.º 25
0
    def run(self):
        """ Start VM and start BJ agent via SSH on VM """
        """ Map fields of Pilot description to EC2 API
            { "vm_id":"ami-d7f742be",
              "vm_ssh_username":"******",
              "vm_ssh_keyname":"MyKey",
              "vm_ssh_keyfile":"<path>",
              "vm_type":"t1.micro",
              "access_key_id":"xxx",
              "secret_access_key":"xxx"
            }
        """

        reservation = self.ec2_conn.run_instances(
            self.pilot_compute_description["vm_id"],
            key_name=self.pilot_compute_description["vm_ssh_keyname"],
            instance_type=self.pilot_compute_description["vm_type"],
            security_groups=[SECURITY_GROUP])

        self.instance = reservation.instances[0]
        self.instance_id = self.instance.id
        logger.debug("Started EC2/Eucalyptus/Nova instance: %s" %
                     self.instance_id)
        time.sleep(5)
        self.wait_for_running()

        if self.resource_url.scheme != "euca+ssh" and self.resource_url.scheme != "nova+ssh":
            self.ec2_conn.create_tags([self.instance_id], {"Name": self.id})

        self.network_ip = self.instance.ip_address
        url = "ssh://" + str(self.network_ip)
        logger.debug("Connect to: %s" % (url))
        js = saga.job.Service(url)

        # Submit job
        ctx = saga.Context()
        ctx.type = saga.Context.SSH
        ctx.userid = self.pilot_compute_description["vm_ssh_username"]
        ctx.userkey = self.pilot_compute_description["vm_ssh_keyfile"]
        js.session.contexts = [ctx]

        logger.debug("Job Description Type: " +
                     str(type(self.job_description)))

        job = js.create_job(self.job_description)

        TRIAL_MAX = 30
        trials = 0
        while trials < TRIAL_MAX:
            try:
                logger.debug("Attempt: %d, submit pilot job to: %s " %
                             (trials, str(url)))
                job.run()
                break
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                logger.warning("Submission failed: " + str(exc_value))
                #self.__print_traceback()
                trials = trials + 1
                time.sleep(30)
                if trials == TRIAL_MAX:
                    raise Exception("Submission of agent failed.")

        logger.debug("Job State : %s" % (job.get_state()))