def run_job(self, job_details=None):
        JobDeploymentBase.run_job(self)
        # This function uses the SAGA-Python library to run the job. Separate
        # functionality in the library is used for monitoring the job process.

        # TODO: Add modules to PBS job confiuguration

        # Here we extract the job details from the previously stored job details
        # object into a SAGA Python job description object so that we can run
        # the job.
        job_arguments = getattr(self.job_config, 'args', [])
        input_files = getattr(self, 'transferred_input_files', [])
        job_arguments += input_files

        jd = saga.job.Description()
        jd.environment = getattr(self.job_config, 'environment', {})
        jd.executable = getattr(self.job_config, 'executable', None)
        jd.arguments = job_arguments
        jd.working_directory = getattr(self.job_config, 'working_dir', None)
        jd.output = getattr(self.job_config, 'stdout', None)
        jd.error = getattr(self.job_config, 'stderr', None)
        jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0)
        jd.total_cpu_count = getattr(self.job_config, 'num_processes', 1)
        #jd.processes_per_host = 1
        #jd.total_physical_memory = "2400"

        self.job = self.svc.create_job(jd)
        self.job.run()
    def run_job(self):
        JobDeploymentBase.run_job(self)
        # This function uses the libhpc resource daemon client to talk to the
        # resource daemon that is installed on cloud resources. It uses this
        # interface to run jobs and monitor their state to see when they are
        # complete.
        # TODO: Should this be running/managing the job remotely via a SAGA
        # SSH session or should we be expecting to communicate with a remote
        # resource management service to handle this?
        LOG.debug('Run job...')

        job_arguments = getattr(self.job_config, 'args', [])
        input_files = getattr(self, 'transferred_input_files', [])
        job_arguments += input_files

        # Check if we have a JOB_ID variable in the arguments or input files.
        # If so, replace this variable with the actual job ID.
        job_arguments_tmp = job_arguments
        job_arguments = []
        for item in job_arguments_tmp:
            # Can't do a replace on items that are not string types!
            if isinstance(item, basestring):
                job_arguments.append(
                    item.replace('$JOB_ID', self.job_config.job_id))
            else:
                job_arguments.append(item)

        LOG.debug('Modified job arguments: %s' % job_arguments)

        jd = saga.job.Description()
        jd.environment = getattr(self.job_config, 'environment', {})
        if self.job_config.num_processes > 1:
            jd.executable = ('mpirun -np %s -machinefile /tmp/machinefile' %
                             (self.job_config.num_processes))
            executable = getattr(self.job_config, 'executable', None)
            if executable:
                job_arguments.insert(0, executable)
        else:
            jd.executable = getattr(self.job_config, 'executable', None)
        jd.arguments = job_arguments
        jd.working_directory = getattr(self.job_config, 'working_dir', None)
        jd.output = getattr(self.job_config, 'stdout', None)
        jd.error = getattr(self.job_config, 'stderr', None)
        jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0)
        #jd.number_of_processes = 4
        #jd.processes_per_host = 1
        #jd.total_physical_memory = "2400"

        if not jd.output:
            jd.output = 'std.out'
        if not jd.error:
            jd.error = 'std.err'

        self.svc = saga.job.Service('ssh://%s/' %
                                    self.running_nodes[0][0].public_ips[0],
                                    session=self.session)
        self.job = self.svc.create_job(jd)
        self.job.run()
Ejemplo n.º 3
0
    def run_job(self):
        JobDeploymentBase.run_job(self)
        # TODO: Should this be running/managing the job remotely via a SAGA
        # SSH session or should we be expecting to communicate with a remote
        # resource management service to handle this?

        # This function could use the libhpc resource daemon client to talk to
        # a resource daemon that is installed on the target resource, however,
        # at present we simply use SSH (via SAGA Python) to trigger job
        # execution and handle compressing and returning the output files.
        LOG.debug('SSH Deployer: Run job...')

        job_arguments = getattr(self.job_config, 'args', [])
        input_files = getattr(self, 'transferred_input_files', [])
        job_arguments += input_files

        # Check if we have a JOB_ID variable in the arguments or input files.
        # If so, replace this variable with the actual job ID.
        job_arguments_tmp = job_arguments
        job_arguments = []
        for item in job_arguments_tmp:
            # Can't do a replace on items that are not string types!
            if isinstance(item, basestring):
                job_arguments.append(
                    item.replace('$JOB_ID', self.job_config.job_id))
            else:
                job_arguments.append(item)

        LOG.debug('Modified job arguments: %s' % job_arguments)

        jd = saga.job.Description()
        jd.environment = getattr(self.job_config, 'environment', {})
        jd.executable = getattr(self.job_config, 'executable', None)
        jd.arguments = job_arguments
        jd.working_directory = getattr(self.job_config, 'working_dir', None)
        jd.output = getattr(self.job_config, 'stdout', None)
        jd.error = getattr(self.job_config, 'stderr', None)
        jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0)

        if not jd.output:
            jd.output = 'std.out'
        if not jd.error:
            jd.error = 'std.err'

        self.svc = saga.job.Service('ssh://%s/' % self.host,
                                    session=self.session)
        self.job = self.svc.create_job(jd)
        self.job.run()
    def run_job(self):
        JobDeploymentBase.run_job(self)
        # This function uses the libhpc resource daemon client to talk to the
        # resource daemon that is installed on cloud resources. It uses this
        # interface to run jobs and monitor their state to see when they are
        # complete.
        # TODO: Should this be running/managing the job remotely via a SAGA
        # SSH session or should we be expecting to communicate with a remote
        # resource management service to handle this?
        LOG.debug('Run job...')

        job_arguments = getattr(self.job_config, 'args', [])
        input_files = getattr(self, 'transferred_input_files', [])
        job_arguments += input_files

        jd = Description()
        jd.environment = getattr(self.job_config, 'environment', {})
        if self.job_config.num_processes > 1:
            jd.executable = ('mpirun -np %s -machinefile /tmp/machinefile' %
                             (self.job_config.num_processes))
            executable = getattr(self.job_config, 'executable', None)
            if executable:
                job_arguments.insert(0, executable)
        else:
            jd.executable = getattr(self.job_config, 'executable', None)
        jd.arguments = job_arguments
        jd.working_directory = getattr(self.job_config, 'working_dir', None)
        jd.output = getattr(self.job_config, 'stdout', None)
        jd.error = getattr(self.job_config, 'stderr', None)
        jd.wall_time_limit = getattr(self.job_config, 'time_limit_mins', 0)
        #jd.number_of_processes = 4
        #jd.processes_per_host = 1
        #jd.total_physical_memory = "2400"

        if not jd.output:
            jd.output = 'std.out'
        if not jd.error:
            jd.error = 'std.err'

        self.svc = Service('ssh://%s/' %
                           self.running_nodes[0][0].public_ips[0],
                           session=self.session)
        self.job = self.svc.create_job(jd)
        self.job.run()
Ejemplo n.º 5
0
 def run_job(self, job_details=None):
     JobDeploymentBase.run_job(self)