Beispiel #1
0
    def generate_processing_script_container(self, processing):
        param_values = {
            'MAX_POINTS': self.max_points,
            'NUM_POINTS': self.points_to_generate,
            'IN': self.input_json,
            'OUT': self.output_json
        }
        proxy_filename = 'x509up'
        if 'X509_USER_PROXY' in os.environ and os.environ['X509_USER_PROXY']:
            proxy_filename = os.path.basename(os.environ['X509_USER_PROXY'])
            param_values['X509_USER_PROXY_FULLNAME'] = os.environ[
                'X509_USER_PROXY']
            param_values['X509_USER_PROXY_BASENAME'] = proxy_filename

        executable = replace_parameters_with_values(self.executable,
                                                    param_values)
        arguments = replace_parameters_with_values(self.arguments,
                                                   param_values)

        script = "#!/bin/bash\n\n"
        script += "executable=%s\n" % str(executable)
        script += "arguments=%s\n" % str(arguments)
        script += "input_json=%s\n" % str(self.input_json)
        script += "output_json=%s\n" % str(self.output_json)
        script += "\n"

        script += "env\n"
        script += "echo $X509_USER_PROXY\n"
        script += "\n"

        script += "echo 'user id:'\n"
        script += "id\n"
        script += "\n"

        if self.sandbox and 'docker' in executable:
            arguments = 'run --rm -v $(pwd):%s -v /cvmfs:/cvmfs -e X509_USER_PROXY=%s/%s %s ' % (
                self.container_workdir, self.container_workdir, proxy_filename,
                self.sandbox) + arguments

        script += "echo '%s' '%s'\n" % (str(executable), str(arguments))
        script += '%s %s\n' % (str(executable), str(arguments))

        if self.sandbox and 'docker' in executable:
            script += 'docker image rm -f %s\n' % self.sandbox

        script += '\n'

        long_id = self.get_long_id(processing)
        script_name = 'processing_%s.sh' % long_id
        script_name = os.path.join(self.get_working_dir(processing),
                                   script_name)
        with open(script_name, 'w') as f:
            f.write(script)
        run_command("chmod +x %s" % script_name)
        return script_name
Beispiel #2
0
    def generate_processing_script_sandbox(self, processing):
        param_values = {
            'MAX_POINTS': self.max_points,
            'NUM_POINTS': self.points_to_generate,
            'IN': self.input_json,
            'OUT': self.output_json
        }
        if 'X509_USER_PROXY' in os.environ and os.environ['X509_USER_PROXY']:
            proxy_filename = os.path.basename(os.environ['X509_USER_PROXY'])
            param_values['X509_USER_PROXY_FULLNAME'] = os.environ[
                'X509_USER_PROXY']
            param_values['X509_USER_PROXY_BASENAME'] = proxy_filename

        executable = replace_parameters_with_values(self.executable,
                                                    param_values)
        arguments = replace_parameters_with_values(self.arguments,
                                                   param_values)

        script = "#!/bin/bash\n\n"
        script += "sandbox=%s\n" % str(self.sandbox)
        script += "executable=%s\n" % str(executable)
        script += "arguments=%s\n" % str(arguments)
        script += "input_json=%s\n" % str(self.input_json)
        script += "output_json=%s\n" % str(self.output_json)
        script += "\n"

        script += "env\n"
        script += "echo $X509_USER_PROXY\n"
        script += "\n"

        script += "echo 'user id:'\n"
        script += "id\n"
        script += "\n"

        script += "wget $sandbox\n"
        script += 'base_sandbox="$(basename -- $sandbox)"\n'
        script += 'tar xzf $base_sandbox\n'

        script += 'chmod +x %s\n' % str(executable)
        script += "echo '%s' '%s'\n" % (str(executable), str(arguments))
        script += '%s %s\n' % (str(executable), str(arguments))

        script += '\n'

        long_id = self.get_long_id(processing)
        script_name = 'processing_%s.sh' % long_id
        script_name = os.path.join(self.get_working_dir(processing),
                                   script_name)
        with open(script_name, 'w') as f:
            f.write(script)
        run_command("chmod +x %s" % script_name)
        return script_name
Beispiel #3
0
    def poll_job_status(self, processing_id, job_id):
        # 0 Unexpanded     U
        # 1 Idle           I
        # 2 Running        R
        # 3 Removed        X
        # 4 Completed      C
        # 5 Held           H
        # 6 Submission_err E
        cmd = "condor_q -format '%s' ClusterId  -format ' %s' Processing_id -format ' %s' JobStatus " + str(
            job_id)
        status, output, error = run_command(cmd)
        self.logger.debug("poll job status: %s" % cmd)
        self.logger.debug("status: %s, output: %s, error: %s" %
                          (status, output, error))
        if status == 0 and len(output) == 0:
            cmd = "condor_history -format '%s' ClusterId  -format ' %s' Processing_id -format ' %s' JobStatus " + str(
                job_id)
            status, output, error = run_command(cmd)
            self.logger.debug("poll job status: %s" % cmd)
            self.logger.debug("status: %s, output: %s, error: %s" %
                              (status, output, error))

        ret_err = None
        if status == 0:
            lines = output.split('\n')
            for line in lines:
                c_job_id, c_processing_id, c_job_status = line.split(' ')
                if str(c_job_id) != str(job_id):
                    continue

                c_processing_id = int(c_processing_id)
                c_job_status = int(c_job_status)
                if c_processing_id != processing_id:
                    final_job_status = ProcessingStatus.Failed
                    ret_err = 'jobid and the processing_id mismatched'
                else:
                    job_status = c_job_status
                    if job_status < 2:
                        final_job_status = ProcessingStatus.Submitted
                    elif job_status == 2:
                        final_job_status = ProcessingStatus.Running
                    elif job_status == 3:
                        final_job_status = ProcessingStatus.Cancel
                    elif job_status == 4:
                        final_job_status = ProcessingStatus.Finished
                    else:
                        final_job_status = ProcessingStatus.Failed
        else:
            final_job_status = ProcessingStatus.Submitted
        return final_job_status, ret_err
def submit_processing_task():
    outDS = "user.wguan.altest%s" % str(int(time.time()))
    cmd = "cd /afs/cern.ch/user/w/wguan/workdisk/iDDS/main/lib/idds/tests/activelearning_test_codes; prun --exec 'python simplescript.py 0.5 0.5 200 output.json' --outDS %s  --outputs output.json --nJobs=10" % outDS
    status, output, error = run_command(cmd)
    """
    print("status:")
    print(status)
    print("output:")
    print(output)
    print("error:")
    print(error)

    status:
    0
    output:

    error:
    INFO : gathering files under /afs/cern.ch/user/w/wguan/workdisk/iDDS/main/lib/idds/tests/activelearning_test_codes
    INFO : upload source files
    INFO : submit user.wguan.altest1234/
    INFO : succeeded. new jediTaskID=23752996
    """
    if status == 0:
        task_id = get_task_id(output, error)
        return task_id
    else:
        raise Exception(output + error)
Beispiel #5
0
    def generate_processing_script_nevergrad(self, processing):
        executable = self.agent_attributes['nevergrad']['executable']
        arguments = self.agent_attributes['nevergrad']['arguments']

        param_values = {
            'MAX_POINTS': self.max_points,
            'NUM_POINTS': self.points_to_generate,
            'IN': self.input_json,
            'OUT': self.output_json
        }
        if 'X509_USER_PROXY' in os.environ and os.environ['X509_USER_PROXY']:
            proxy_filename = os.path.basename(os.environ['X509_USER_PROXY'])
            param_values['X509_USER_PROXY_FULLNAME'] = os.environ[
                'X509_USER_PROXY']
            param_values['X509_USER_PROXY_BASENAME'] = proxy_filename

        arguments = replace_parameters_with_values(arguments, param_values)

        script = "#!/bin/bash\n\n"
        script += "executable=%s\n" % os.path.basename(executable)
        script += "arguments='%s'\n" % str(arguments)
        script += "input_json=%s\n" % str(self.input_json)
        script += "output_json=%s\n" % str(self.output_json)
        script += "\n"

        script += "env\n"
        script += "echo $X509_USER_PROXY\n"
        script += "\n"

        script += "echo 'user id:'\n"
        script += "id\n"
        script += "\n"

        script += "echo '%s' '%s'\n" % (os.path.basename(executable),
                                        str(arguments))
        script += '%s %s\n' % (os.path.basename(executable), str(arguments))

        script += '\n'

        long_id = self.get_long_id(processing)
        script_name = 'processing_%s.sh' % long_id
        script_name = os.path.join(self.get_working_dir(processing),
                                   script_name)
        with open(script_name, 'w') as f:
            f.write(script)
        run_command("chmod +x %s" % script_name)
        return script_name
Beispiel #6
0
    def generate_processing_script_sandbox(self, processing):
        arguments = self.parse_arguments()

        script = "#!/bin/bash\n\n"
        script += self.get_rucio_setup_env()
        script += "\n"

        script += "sandbox=%s\n" % str(self.sandbox)
        script += "executable=%s\n" % str(self.executable)
        script += "arguments=%s\n" % str(arguments)
        script += "output_json=%s\n" % str(self.output_json)
        script += "\n"

        script += "env\n"
        script += "echo $X509_USER_PROXY\n"
        script += "\n"

        script += "echo 'user id:'\n"
        script += "id\n"
        script += "\n"

        script += "wget $sandbox\n"
        script += 'base_sandbox="$(basename -- $sandbox)"\n'
        script += 'tar xzf $base_sandbox\n'

        dataset = self.collections[self._primary_input_collection]
        script += 'rucio download %s:%s\n' % (dataset['scope'],
                                              dataset['name'])
        script += 'chmod +x %s\n' % str(self.executable)
        script += "echo '%s' '%s'\n" % (str(self.executable), str(arguments))
        script += '%s %s\n' % (str(self.executable), str(arguments))

        script += 'ls\n\n'

        long_id = self.get_long_id(processing)
        script_name = 'processing_%s.sh' % long_id
        script_name = os.path.join(self.get_working_dir(processing),
                                   script_name)
        with open(script_name, 'w') as f:
            f.write(script)
        run_command("chmod +x %s" % script_name)
        return script_name
Beispiel #7
0
 def submit_job(self, processing_id, sandbox, executable, arguments, input_list, input_json, output_json, should_transfer_executable=False):
     jdl_file = self.generate_submit_file(processing_id, sandbox, executable, arguments, input_list, input_json, output_json, should_transfer_executable=should_transfer_executable)
     cmd = "condor_submit " + jdl_file
     status, output, error = run_command(cmd)
     jobid = None
     self.logger.info("submiting the job to cluster: %s" % cmd)
     self.logger.info("status: %s, output: %s, error: %s " % (status, output, error))
     if status == 0 or str(status) == '0':
         if output and 'submitted to cluster' in output:
             for line in output.split('\n'):
                 if 'submitted to cluster' in line:
                     jobid = line.split(' ')[-1].replace('.', '')
                     return jobid, None
     return None, output + error
Beispiel #8
0
    def submit_condor_processing(self, processing):
        jdl_file, err_msg = self.generate_processing_submit_file(processing)
        if not jdl_file:
            return None, err_msg

        cmd = "condor_submit " + jdl_file
        status, output, error = run_command(cmd)
        jobid = None
        self.logger.info("submiting the job to cluster: %s" % cmd)
        self.logger.info("status: %s, output: %s, error: %s " %
                         (status, output, error))
        if status == 0 or str(status) == '0':
            if output and 'submitted to cluster' in output:
                for line in output.split('\n'):
                    if 'submitted to cluster' in line:
                        jobid = line.split(' ')[-1].replace('.', '')
                        return jobid, None
        return None, output + error
Beispiel #9
0
    def poll_condor_job_status(self, processing, job_id):
        # 0 Unexpanded     U
        # 1 Idle           I
        # 2 Running        R
        # 3 Removed        X
        # 4 Completed      C
        # 5 Held           H
        # 6 Submission_err E
        cmd = "condor_q -format '%s' ClusterId  -format ' %s' Processing_id -format ' %s' JobStatus -format ' %s' Iwd -format ' %s' Cmd -format ' %s' Err " + str(
            job_id)
        status, output, error = run_command(cmd)
        self.logger.info("poll job status: %s" % cmd)
        self.logger.info("status: %s, output: %s, error: %s" %
                         (status, output, error))
        if status == 0 and len(output) == 0:
            cmd = "condor_history -format '%s' ClusterId  -format ' %s' Processing_id -format ' %s' JobStatus -format ' %s' Iwd -format ' %s' Cmd -format ' %s' Err " + str(
                job_id)
            status, output, error = run_command(cmd)
            self.logger.info("poll job status: %s" % cmd)
            self.logger.info("status: %s, output: %s, error: %s" %
                             (status, output, error))

        ret_err = ''
        job_cmd_msg, job_err_msg = '', ''
        if status == 0:
            lines = output.split('\n')
            for line in lines:
                c_job_id, c_processing_id, c_job_status, job_workdir, job_cmd, job_err = line.split(
                    ' ')
                if str(c_job_id) != str(job_id):
                    continue

                processing_id = self.get_long_id(processing)
                c_job_status = int(c_job_status)
                if c_processing_id != processing_id:
                    final_job_status = ProcessingStatus.Failed
                    ret_err = 'jobid and the processing_id mismatched'
                else:
                    job_status = c_job_status
                    if job_status < 2:
                        final_job_status = ProcessingStatus.Submitted
                    elif job_status == 2:
                        final_job_status = ProcessingStatus.Submitted
                    elif job_status == 2:
                        final_job_status = ProcessingStatus.Running
                    elif job_status == 3:
                        final_job_status = ProcessingStatus.Cancelled
                    elif job_status == 4:
                        final_job_status = ProcessingStatus.Finished
                    else:
                        final_job_status = ProcessingStatus.Failed

                    if final_job_status in [ProcessingStatus.Failed]:
                        job_cmd_msg = self.get_job_err_message(
                            job_workdir, job_cmd)
                        job_cmd_msg = job_cmd_msg[-500:]
                        job_err_msg = self.get_job_err_message(
                            job_workdir, job_err)
        else:
            final_job_status = ProcessingStatus.Submitted

        # if output:
        #     ret_err += output
        if error:
            ret_err += error
        if job_cmd_msg:
            ret_err += "Command output: " + job_cmd_msg
        if job_err_msg:
            ret_err += "Stderr: " + job_err_msg

        return final_job_status, ret_err
Beispiel #10
0
    def poll_job_status(self, processing_id, job_id):
        # 0 Unexpanded     U
        # 1 Idle           I
        # 2 Running        R
        # 3 Removed        X
        # 4 Completed      C
        # 5 Held           H
        # 6 Submission_err E
        cmd = "condor_q -format '%s' ClusterId  -format ' %s' Processing_id -format ' %s' JobStatus -format ' %s' Out -format ' %s' Err " + str(
            job_id)
        status, output, error = run_command(cmd)
        self.logger.info("poll job status: %s" % cmd)
        self.logger.info("status: %s, output: %s, error: %s" %
                         (status, output, error))
        if status == 0 and len(output) == 0:
            cmd = "condor_history -format '%s' ClusterId  -format ' %s' Processing_id -format ' %s' JobStatus -format ' %s' Out -format ' %s' Err " + str(
                job_id)
            status, output, error = run_command(cmd)
            self.logger.info("poll job status: %s" % cmd)
            self.logger.info("status: %s, output: %s, error: %s" %
                             (status, output, error))

        ret_err = None
        if status == 0:
            lines = output.split('\n')
            for line in lines:
                c_job_id, c_processing_id, c_job_status, c_job_out_file, c_job_err_file = line.split(
                    ' ')
                if str(c_job_id) != str(job_id):
                    continue

                c_processing_id = int(c_processing_id)
                c_job_status = int(c_job_status)
                if c_processing_id != processing_id:
                    final_job_status = ProcessingStatus.Failed
                    ret_err = 'jobid and the processing_id mismatched'
                else:
                    job_status = c_job_status
                    if job_status < 2:
                        final_job_status = ProcessingStatus.Submitted
                    elif job_status == 2:
                        final_job_status = ProcessingStatus.Running
                    elif job_status == 3:
                        final_job_status = ProcessingStatus.Cancel
                    elif job_status == 4:
                        final_job_status = ProcessingStatus.Finished
                    else:
                        final_job_status = ProcessingStatus.Failed
        else:
            final_job_status = ProcessingStatus.Submitted

        out_msg, err_msg = None, None
        if final_job_status in [
                ProcessingStatus.Cancel, ProcessingStatus.Finished,
                ProcessingStatus.Failed
        ]:
            if os.path.exists(c_job_out_file):
                with open(c_job_out_file) as f:
                    out_msg = f.read()
            if os.path.exists(c_job_err_file):
                with open(c_job_err_file) as f:
                    err_msg = f.read()
        return final_job_status, ret_err, out_msg, err_msg