Ejemplo n.º 1
0
    def test_submission(self):

        jobsubmit_bin = 'job submit'
        jobsubmit_command = clg.CommandlineGenerator(jobsubmit_bin,
                                                     self.options, ['dir'])
        hpc_command_line = jobsubmit_command.Commandline

        p = subprocess.Popen(hpc_command_line,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        p.wait(timeout=30)
        assert p.returncode is 0, hpc_command_line
Ejemplo n.º 2
0
    def run(self):

        self.__class__.sems.acquire()

        def get_num_cores(some_json):
            num_cores = 1
            if ('parameters' in some_json) and ('Num_Cores'
                                                in some_json['parameters']):
                num_cores = some_json['parameters']['Num_Cores']
            else:
                print(
                    "Didn't find key 'parameters/Num_Cores' in '{0}'. Using 1."
                    .format(self.scenario_path))

            return int(num_cores)

        input_dir = self.params.input_root + self.config_json["parameters"][
            "Geography"] + "\\"
        sim_dir = self.sim_root + "\\" + self.sim_timestamp  # can't use os.path.join() here because on linux it'll give us the wrong dir-separator...
        if self.suffix is not None:
            job_name = self.config_json["parameters"]["Config_Name"].replace(
                ' ', '_') + "_" + self.suffix + "_(" + self.sim_timestamp + ")"
        else:
            job_name = self.config_json["parameters"]["Config_Name"].replace(
                ' ', '_') + "_(" + self.sim_timestamp + ")"
        job_name = job_name[:79]

        numcores = get_num_cores(self.config_json)

        hpc_resource_option = '/numcores:'
        hpc_resource_count = str(numcores)
        mpi_core_option = None
        mpi_core_count = ''

        if self.params.measure_perf:
            if numcores % self.params.cores_per_node == 0:
                hpc_resource_option = '/numnodes:'
                hpc_resource_count = str(numcores / self.params.cores_per_node)
                mpi_core_option = '-c'
                mpi_core_count = str(self.params.cores_per_node)
            elif numcores == self.params.cores_per_socket:
                hpc_resource_option = '/numsockets:'
                hpc_resource_count = '1'
                mpi_core_option = '-c'
                mpi_core_count = str(self.params.cores_per_socket)
            # "bail" here, we don't have a multiple of cores per node nor can we fit on a single socket

        #eradication.exe commandline
        eradication_bin = self.config_json['bin_path']
        eradication_options = {
            '--config': 'config.json',
            '--input-path': input_dir,
            '--progress': ' '
        }

        # python-script-path is optional parameter.
        if "PSP" in self.config_json:
            eradication_options["--python-script-path"] = self.config_json[
                "PSP"]
        #if params.dll_root is not None and params.use_dlls is True:
        #    eradication_options['--dll-path'] = params.dll_root
        eradication_params = []
        eradication_command = clg.CommandlineGenerator(eradication_bin,
                                                       eradication_options,
                                                       eradication_params)

        #mpiexec commandline
        mpi_bin = 'mpiexec'
        mpi_options = {}
        if mpi_core_option is not None:
            mpi_options[mpi_core_option] = mpi_core_count
        mpi_params = [eradication_command.Commandline]
        mpi_command = clg.CommandlineGenerator(mpi_bin, mpi_options,
                                               mpi_params)

        #job submit commandline
        jobsubmit_bin = 'job submit'
        jobsubmit_options = {}
        jobsubmit_options['/workdir:'] = sim_dir
        jobsubmit_options['/scheduler:'] = self.params.hpc_head_node
        jobsubmit_options['/nodegroup:'] = self.params.hpc_node_group
        jobsubmit_options['/user:'******'':
        #jobsubmit_options['/password:'******'/jobname:'] = job_name
        jobsubmit_options[hpc_resource_option] = hpc_resource_count
        if self.params.measure_perf:
            jobsubmit_options['/exclusive'] = ' '
        if self.scenario_type == 'tests':
            jobsubmit_options['/stdout:'] = 'StdOut.txt'
        else:
            print("Going to redirect stdout, not using parameter.")
            #jobsubmit_options['/stdout:'] = 'Test.txt'
        jobsubmit_options['/stderr:'] = 'StdErr.txt'
        jobsubmit_options['/priority:'] = 'Lowest'
        jobsubmit_params = [mpi_command.Commandline]
        jobsubmit_command = clg.CommandlineGenerator(jobsubmit_bin,
                                                     jobsubmit_options,
                                                     jobsubmit_params)

        #print 'simulation command line:', eradication_command.Commandline
        #print 'mpiexec command line:   ', mpi_command.Commandline
        #print 'job submit command line:', jobsubmit_command.Commandline

        hpc_command_line = jobsubmit_command.Commandline
        if self.scenario_type != 'tests':
            hpc_command_line = hpc_command_line + " ^> Test.txt"

        job_id = -1
        num_retries = -1

        while job_id == -1:
            num_retries += 1
            #print "executing hpc_command_line: " + hpc_command_line + "\n"

            #p = subprocess.Popen( hpc_command_line.split(), shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
            #p = subprocess.Popen( hpc_command_line.split(), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
            p = subprocess.Popen(hpc_command_line,
                                 shell=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            [hpc_pipe_stdout, hpc_pipe_stderr] = p.communicate()
            #print "Trying to read hpc response..."
            #print hpc_pipe_stdout
            line = hpc_pipe_stdout

            if p.returncode == 0:
                job_id = line.split(' ')[-1].strip().rstrip('.')
                if str.isdigit(job_id) and job_id > 0:
                    print(self.scenario_path + " submitted (as job_id " +
                          str(job_id) + ")\n")
                else:
                    print(
                        "ERROR: What happened here?  Please send this to Jeff:\n"
                    )
                    print(hpc_pipe_stdout)
                    print(hpc_pipe_stderr)
                    job_id = -1
            else:
                print("ERROR: job submit of " + self.scenario_path +
                      " failed!")
                print(hpc_pipe_stdout)
                print(hpc_pipe_stderr)

            if job_id == -1 and num_retries >= 5 and self.params.hide_graphs:
                print("Job submission failed multiple times for " +
                      self.scenario_path +
                      ".  Aborting this test and logging error.")
                self.report.addErroringTest(self.scenario_path, "", sim_dir,
                                            self.scenario_type)
                return

        monitor_cmd_line = "job view /scheduler:" + self.params.hpc_head_node + " " + str(
            job_id)

        check_status = True
        while check_status:
            #print "executing hpc_command_line: " + monitor_cmd_line
            #print "Checking status of job " + str(job_id)
            #hpc_pipe = os.popen( monitor_cmd_line )
            hpc_pipe = subprocess.Popen(monitor_cmd_line.split(),
                                        shell=False,
                                        stdout=subprocess.PIPE)
            [hpc_pipe_stdout, hpc_pipe_stderr] = hpc_pipe.communicate()
            lines = hpc_pipe_stdout
            #for line in hpc_pipe.readlines():
            #print lines
            for line in lines.split('\n'):
                res = line.split(':')
                #print "DEBUG: " + str(res[0])
                if res[0].strip() == "State":
                    state = res[1].strip()
                    if state == "Failed":
                        self.__class__.completed = self.__class__.completed + 1
                        print(self.scenario_path + " FAILED!")
                        check_status = False
                        self.report.addErroringTest(self.scenario_path, "",
                                                    sim_dir,
                                                    self.scenario_type)
                        #self.finish(sim_dir, False)
                    if state == "Canceled":
                        self.__class__.completed = self.__class__.completed + 1
                        print("Canceled!")
                        check_status = False
                        #self.finish(sim_dir, False)
                    elif state == "Completed" or state == "Finished":
                        self.__class__.completed = self.__class__.completed + 1
                        print(
                            str(self.__class__.completed) + " out of " +
                            str(len(ru.reg_threads)) + " completed.")
                        check_status = False

                        status_file = open(os.path.join(sim_dir, "status.txt"))
                        for status_line in status_file.readlines():
                            if status_line.startswith("Done"):
                                time_split = status_line.split(
                                    '-')[1].strip().split(':')
                                self.duration = datetime.timedelta(
                                    hours=int(time_split[0]),
                                    minutes=int(time_split[1]),
                                    seconds=int(time_split[2]))
                                break

                        if self.scenario_type == 'tests':
                            if self.params.all_outputs == False:
                                # Following line is for InsetChart.json only
                                self.verify(sim_dir)
                            else:
                                # Every .json file in output (not hidden with . prefix) will be used for validation
                                for file in os.listdir(
                                        os.path.join(self.scenario_path,
                                                     "output")):
                                    if (file.endswith(".json")
                                            or file.endswith(".csv")
                                            or file.endswith(".kml")
                                            or file.endswith(".bin")
                                            or file.endswith(".h5")
                                            or file.endswith(".db")
                                        ) and file[
                                            0] != "." and file != "transitions.json" and "linux" not in file:
                                        self.verify(sim_dir, file, "Channels")
                        elif self.scenario_type == 'science':  # self.report <> None:
                            self.science_verify(sim_dir)

                    break
            time.sleep(5)
        self.__class__.sems.release()