def check_for_existing_output(self, r, rname): """ Check whether given runcard already has output in the grid needs testing as it needs to be able to remove (many) things for production run It relies on the base seed from the src.header file to remove the output """ from pyHepGrid.src.header import grid_output_dir, logger logger.debug(F"Checking whether runcard {rname} has output for seeds " "that you are trying to submit...") checkname = r + "-" + rname files = self.gridw.get_dir_contents(grid_output_dir) first = True if checkname in files: from pyHepGrid.src.header import baseSeed, producRun for seed in range(baseSeed, baseSeed + producRun): filename = self.output_name(r, rname, seed) if filename in files: if first: self._press_yes_to_continue( "This runcard already has at least one output file " "at gfal:output with a seed you are trying to " F"submit (looked for '{checkname}').\n" "If you continue, it will be removed now.") logger.warning( F"Runcard {r} has at least one file at output") first = False self.gridw.delete(filename, grid_output_dir) logger.info("Output check complete")
def check_for_existing_output(self, r, rname): """ Check whether given runcard already has output in the grid needs testing as it needs to be able to remove (many) things for production run It relies on the base seed from the src.header file to remove the output """ from pyHepGrid.src.header import grid_output_dir, logger logger.debug( "Checking whether runcard {0} has output for seeds that you are trying to submit..." .format(rname)) checkname = r + "-" + rname files = self.gridw.get_dir_contents(grid_output_dir) first = True if checkname in files: from pyHepGrid.src.header import baseSeed, producRun for seed in range(baseSeed, baseSeed + producRun): filename = self.output_name(r, rname, seed) if filename in files: if first: self._press_yes_to_continue( "It seems this runcard already has at least one file at lfn:output with a seed you are trying to submit (looked for {}). Do you want to remove it/them?" .format(checkname)) logger.warning( "Runcard {0} has at least one file at output". format(r)) first = False self.gridw.delete(filename, grid_output_dir) logger.info("Output check complete")
def check_for_existing_output_local(self, r, rname, baseSeed, producRun): """ Check whether given runcard already has output in the local run dir (looks for log files) """ import re logger.debug( "Checking whether runcard {0} has output for seeds that you are " "trying to submit...".format(rname)) local_dir_name = self.get_local_dir_name(r, rname) files = os.listdir(local_dir_name) runcard = PROGRAMruncard(runcard_file=os.path.join(local_dir_name, r), logger=logger, grid_run=False, use_cvmfs=header.use_cvmfs_lhapdf, cvmfs_loc=header.cvmfs_lhapdf_location) runcard_id = runcard.runcard_dict_case_preserving["id"] logs = [f for f in files if f.endswith(".log") and runcard_id in f] logseed_regex = re.compile(r".s([0-9]+)\.[^\.]+$") existing_seeds = set( [int(logseed_regex.search(i).group(1)) for i in logs]) submission_seeds = set(range(baseSeed, baseSeed + producRun)) overlap = existing_seeds.intersection(submission_seeds) if overlap: logger.warning( "Log files for seed(s) {0} already exist in run folders. " "Running will overwrite the logfiles already present.".format( " ".join(str(i) for i in overlap))) self._press_yes_to_continue(None) return
def __check_pulled_warmup(self, success, tmpnm, warmup_extensions): if success: matches, sizes = self.tarw.check_filesizes(tmpnm, warmup_extensions) if len(matches)==0: logger.warning("No warmup file found on main Grid Storage") return False if any(size==0 for size in sizes): logger.warning("Empty warmup file found on Grid Storage") return False return success
def check_warmup_files(self, db_id, rcard, resubmit=False): """ Provides stats on whether a warmup file exists for a given run and optionally resubmit if absent """ import tempfile import tarfile origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) rncards, dCards = util.expandCard() tags = ["runcard", "runfolder"] runcard_info = self.dbase.list_data(self.table, tags, db_id)[0] runcard = runcard_info["runcard"] rname = runcard_info["runfolder"] try: warmup_files = self._bring_warmup_files(runcard, rname, check_only=True) if warmup_files == []: status = "\033[93mMissing\033[0m" else: status = "\033[92mPresent\033[0m" except tarfile.ReadError: status = "\033[91mCorrupted\033[0m" run_id = "{0}-{1}:".format(runcard, rname) logger.info("[{0}] {2:55} {1:>20}".format(db_id, status, run_id)) os.chdir(origdir) if resubmit and "Present" not in status: done, wait, run, fail, unk = self.stats_job(db_id, do_print=False) if run + wait > 0: # Be more careful in case of unknown status logger.warning("Job still running. Not resubmitting for now") else: # Need to override dictCard for single job submission expandedCard = ([runcard], {runcard: rname}) logger.info( "Warmup not found and job ended. Resubmitting to ARC") from pyHepGrid.src.runArcjob import runWrapper runWrapper(rcard, expandedCard=expandedCard)