Exemple #1
0
 def cat_job(self, jobids, jobinfo, print_stderr=None, store=False):
     """ print standard output of a given job"""
     dir_name = self.get_stdout_dir_name(
         self.get_local_dir_name(jobinfo["runcard"], jobinfo["runfolder"]))
     # jobids = length 1 for SLURM jobs - just take the only element here
     jobid = jobids[0]
     output = []
     if jobinfo["jobtype"] == "Production" or "Socket" in jobinfo["jobtype"]:
         for subjobno in range(1, int(jobinfo["no_runs"]) + 1):
             stdoutfile = os.path.join(
                 dir_name, "slurm-{0}_{1}.out".format(jobid, subjobno))
             if print_stderr:
                 stdoutfile = stdoutfile.replace(".out", ".err")
             cmd = ["cat", stdoutfile]
             if not store:
                 util.spCall(cmd)
             else:
                 output.append(
                     util.getOutputCall(cmd,
                                        suppress_errors=True,
                                        include_return_code=False))
     else:
         stdoutfile = os.path.join(dir_name, F"slurm-{jobid}.out")
         if print_stderr:
             stdoutfile = stdoutfile.replace(".out", ".err")
         cmd = ["cat", stdoutfile]
         if not store:
             util.spCall(cmd)
         else:
             output.append(
                 util.getOutputCall(cmd,
                                    suppress_errors=True,
                                    include_return_code=False))
     if store:
         return output
Exemple #2
0
 def cat_log_job(self, jobids, jobinfo):
     """Sometimes the std output doesn't get updated
     but we can choose to access the logs themselves"""
     output_folder = ["file:///tmp/"]
     cmd_base = ["arccp", "-i"]
     cmd_str = "cat /tmp/"
     for jobid in jobids:
         files = util.getOutputCall(["arcls", jobid]).split()
         logfiles = [i for i in files if i.endswith(".log")]
         for logfile in logfiles:
             cmd = cmd_base + [os.path.join(jobid, logfile)] + output_folder
             output = util.getOutputCall(cmd).split()
             for text in output:
                 if ".log" in text:
                     util.spCall((cmd_str + text).split())
Exemple #3
0
 def _do_stats_job(self, jobid_raw):
     """ version of stats job multithread ready
     """
     if isinstance(jobid_raw, tuple):
         if (jobid_raw[1] == self.cDONE or jobid_raw[1] == self.cFAIL
                 or jobid_raw[1] == self.cMISS):
             return jobid_raw[1]
         else:
             jobid = jobid_raw[0]
     else:
         jobid = jobid_raw
     cmd = [self.cmd_stat, jobid.strip(), "-j", header.arcbase]
     strOut = util.getOutputCall(cmd,
                                 suppress_errors=True,
                                 include_return_code=False)
     if "Done" in strOut or "Finished" in strOut:
         return self.cDONE
     elif "Waiting" in strOut or "Queuing" in strOut:
         return self.cWAIT
     elif "Running" in strOut:
         return self.cRUN
     elif "Failed" in strOut:
         # if we still have a return code 0 something is odd
         if "Exit Code: 0" in strOut:
             return self.cMISS
         return self.cFAIL
     else:
         return self.cUNK
Exemple #4
0
 def _run_JDL(self, filename):
     """ Sends JDL file to the dirac
     management system
     """
     cmd = "dirac-wms-job-submit {}".format(filename)
     output = util.getOutputCall(cmd.split(), include_return_code=False)
     jobids = output.rstrip().strip().split("]")[0].split("[")[-1]
     jobids = jobids.split(", ")
     return jobids
Exemple #5
0
 def get_status(self, jobid, status):
     stat = len([
         i for i in util.getOutputCall(
             ["squeue", "-j{0}".format(jobid), "-r", "-t", status],
             suppress_errors=True).split("\n")[1:] if "error" not in i
     ])  #strip header from results
     if stat > 0:
         stat = stat - 1
     return stat
Exemple #6
0
 def get_status(self, jobid, status):
     stat = len([
         i for i in util.getOutputCall(
             ["squeue", F"-j{jobid}", "-r", "-t", status],
             suppress_errors=True,
             include_return_code=False).split("\n")[1:] if "error" not in i
     ])  # strip header from results
     if stat > 0:
         stat = stat - 1
     return stat
Exemple #7
0
 def get_status(self, status, date):
     output = set(
         util.getOutputCall([
             'dirac-wms-select-jobs',
             '--Status={0}'.format(status),
             '--Owner={0}'.format(header.dirac_name),
             '--Maximum=0',  # 0 lists ALL jobs, which is nice :)
             '--Date={0}'.format(date)
         ]).split("\n")[-2].split(","))
     header.logger.debug(output)
     return output
Exemple #8
0
    def _run_XRSL(self, filename, ce):
        """ Sends XRSL to the queue defined in header """
        from pyHepGrid.src.header import arc_direct

        cmd = "arcsub -c {0} {1} -j {2}".format(ce, filename, self.arcbd)
        # Can only use direct in Durham. Otherwise fails!
        # Speeds up submission (according to Stephen)
        if arc_direct and ".dur.scotgrid.ac.uk" in ce:
            cmd += " -S org.nordugrid.gridftpjob --direct "
        output = util.getOutputCall(cmd.split(), include_return_code=True)
        jobid = output[0].split("jobid:")[-1].rstrip().strip()
        return jobid, output[1]
Exemple #9
0
 def cat_job(self, jobids, jobinfo, print_stderr=None, store=False):
     """ print standard output of a given job"""
     out = []
     for jobid in jobids:
         cmd = [self.cmd_print, "-j", header.arcbase, jobid.strip()]
         if print_stderr:
             cmd += ["-e"]
         if not store:
             util.spCall(cmd)
         else:
             out.append(util.getOutputCall(cmd, include_return_code=False))
     if store:
         return out
Exemple #10
0
 def get_status(self, status, date):
     output = set(
         util.getOutputCall(
             [
                 'dirac-wms-select-jobs',
                 F'--Status={status}',
                 F'--Owner={header.dirac_name}',
                 '--Maximum=0',  # 0 lists ALL jobs, which is nice :)
                 F'--Date={date}'
             ],
             include_return_code=False).split("\n")[-2].split(","))
     header.logger.debug(output)
     return output
Exemple #11
0
    def _run_XRSL(self, filename, test=False, include_retcode=False):
        """ Sends XRSL to the queue defined in header
        If test = True, use test queue
        """
        import random
        from pyHepGrid.src.header import arc_direct
        from pyHepGrid.src.header import split_dur_ce
        if test:
            from pyHepGrid.src.header import ce_test as ce
        else:
            from pyHepGrid.src.header import ce_base as ce
            # Randomise ce at submission time to reduce load
            if split_dur_ce and ".dur.scotgrid.ac.uk" in ce:
                ce = random.choice(
                    ["ce1.dur.scotgrid.ac.uk", "ce2.dur.scotgrid.ac.uk"])
            # if split_dur_ce:
            #     ce = random.choice(
            #         ["ce01.tier2.hep.manchester.ac.uk", "ce02.tier2.hep.manchester.ac.uk"])

            # if split_dur_ce:
            #     ce = random.choice(
            #         ["arc-ce03.gridpp.rl.ac.uk", "arc-ce04.gridpp.rl.ac.uk"])

        cmd = "arcsub -c {0} {1} -j {2}".format(ce, filename, self.arcbd)
        print(cmd)
        # Can only use direct in Durham. Otherwise fails!
        # Speeds up submission (according to Stephen)
        if arc_direct and ".dur.scotgrid.ac.uk" in ce:
            cmd += " -S org.nordugrid.gridftpjob --direct "
        if include_retcode:
            output = util.getOutputCall(cmd.split(), include_return_code=True)
            jobid = output[0].split("jobid:")[-1].rstrip().strip()
            return jobid, output[1]
        else:
            output = util.getOutputCall(cmd.split())
            jobid = output.split("jobid:")[-1].rstrip().strip()
            return jobid
Exemple #12
0
    def _run_SLURM(self,
                   filename,
                   args,
                   queue,
                   test=False,
                   socket=None,
                   n_sockets=1):
        """ Takes a slurm runfile and submits it to the SLURM batch system.

        Returns the jobid and queue used for submission"""
        if queue is not None:
            queuetag = "-p {0}".format(queue)
        else:
            queuetag = ""
        cmd = "sbatch {0} {1}".format(filename, queuetag)
        header.logger.debug(cmd)
        output = util.getOutputCall(cmd.split(), include_return_code=False)
        jobid = output.strip().split()[-1]
        return jobid, queue
Exemple #13
0
 def _get_data_warmup(self, db_id):
     """
     Given a database entry, retrieve its data from the warmup folder to the
     folder defined in said database entry For arc jobs stdoutput will be
     downloaded in said folder as well
     """
     # Retrieve data from database
     from pyHepGrid.src.header import arcbase, grid_warmup_dir
     fields = ["runcard", "runfolder", "jobid", "pathfolder"]
     data = self.dbase.list_data(self.table, fields, db_id)[0]
     runfolder = data["runfolder"]
     finfolder = data["pathfolder"] + "/" + runfolder
     runcard = data["runcard"]
     jobids = data["jobid"].split()
     util.spCall(["mkdir", "-p", finfolder])
     logger.info("Retrieving ARC output into " + finfolder)
     try:
         # Retrieve ARC standard output for every job of this run
         for jobid in jobids:
             logger.info(jobid)
             cmd = [self.cmd_get, "-j", arcbase, jobid.strip()]
             output = util.getOutputCall(cmd, include_return_code=False)
             outputfol = output.split("Results stored at: ")[1].rstrip()
             outputfolder = outputfol.split("\n")[0]
             if outputfolder == "" or (len(outputfolder.split(" ")) > 1):
                 logger.info("Running mv and rm command is not safe here")
                 logger.info("Found blank spaces in the output folder")
                 logger.info(
                     "Nothing will be moved to the warmup global folder")
             else:
                 destination = finfolder + "/" + "arc_out_" + runcard + \
                     outputfolder
                 util.spCall(["mv", outputfolder, destination])
                 # util.spCall(["rm", "-rf", outputfolder])
     except BaseException:
         logger.info("Couldn't find job output in the ARC server")
         logger.info("jobid: " + jobid)
         logger.info("Run arcstat to check the state of the job")
         logger.info("Trying to retrieve data from grid storage anyway")
     # Retrieve warmup from the grid storage warmup folder
     wname = self.warmup_name(runcard, runfolder)
     self.gridw.bring(wname, grid_warmup_dir, finfolder + "/" + wname)
Exemple #14
0
 def __check_grid_pdf(self, use_cvmfs=False, cvmfs_loc=""):
     import json
     infofile = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             ".pdfinfo")
     pdf, member = self.parse_pdf_entry()
     if not use_cvmfs:
         try:
             with open(infofile, "r") as f:
                 data = json.load(f)
                 try:
                     members = data[pdf]
                     self.debug("PDF set found")
                 except KeyError:
                     self.critical(
                         "PDF set {0} is not included in currently "
                         "initialised version of LHAPDF".format(pdf))
                 try:
                     assert int(member) in members
                     self.debug("PDF member found")
                 except AssertionError:
                     self.critical(
                         "PDF member {1} for PDF set {0} is not included in "
                         "currently initialised version of LHAPDF".format(
                             pdf, member))
         except FileNotFoundError:
             self.warning("No PDF info file found. Skipping check.")
     else:
         sharedir = "{0}/share/LHAPDF/".format(cvmfs_loc)
         bindir = "{0}/bin/".format(cvmfs_loc)
         os.environ["LHA_DATA_PATH"] = sharedir
         os.environ["LHAPATH"] = sharedir
         cvmfs_pdfs = util.getOutputCall(
             [bindir + "lhapdf", "ls", "--installed"])
         cvmfs_pdfs = [i.strip() for i in cvmfs_pdfs.split()]
         if pdf not in cvmfs_pdfs:
             self.critical("PDF set {0} is not included in cvmfs LHAPDF. "
                           "Turn cvmfs PDF off and use your own one "
                           "(or ask the admins nicely...".format(pdf))
         else:
             self.debug("PDF set found in cvmfs LHAPDF setup")
Exemple #15
0
 def _do_stats_job(self, jobid_raw):
     """ version of stats job multithread ready
     """
     if isinstance(jobid_raw, tuple):
         if jobid_raw[1] == self.cDONE or jobid_raw[1] == self.cFAIL:
             return jobid_raw[1]
         else:
             jobid = jobid_raw[0]
     else:
         jobid = jobid_raw
     cmd = [self.cmd_stat, jobid.strip(), "-j", header.arcbase]
     strOut = util.getOutputCall(cmd, suppress_errors=True)
     if "Done" in strOut or "Finished" in strOut:
         return self.cDONE
     elif "Waiting" in strOut or "Queuing" in strOut:
         return self.cWAIT
     elif "Running" in strOut:
         return self.cRUN
     elif "Failed" in strOut:
         return self.cFAIL
     else:
         return self.cUNK