def include_arguments(self, current_arguments): """ Capture the arguments for slurm and a few custom ones """ # Get the number of threads number_threads = current_arguments.get('threads', 1) # Check whether the runcard is Global or DIS runcard = current_arguments['runcard'] runfolder = current_arguments['runcard_dir'] runcard_path = f"{runfolder}/{runcard}" import yaml with open(runcard_path, 'r') as f: rdata = yaml.load(f) experiments = rdata['experiments'] mem_req = {'dis': 4e3, 'had': 16e3} mode = 'dis' # If any of the datasets include 'LHC' 'ATLAS' or 'CMS' # move to hadronic mode for exp in experiments: datasets = exp['datasets'] for d in datasets: dname = d['dataset'] if 'ATLAS' in dname or 'LHC' in dname or 'CMS' in dname: logger.info( "Hadronic datasets found, setting hadronic mode") mode = 'had' break if mode == 'had': break memory_per_thread = int(mem_req[mode] / number_threads) current_arguments['memsize'] = memory_per_thread return current_arguments
def init_production(self, provided_warmup=None, continue_warmup=False, local=True): """ Initialize production (single mode) """ if not local: raise NotImplementedError( "N3FIT is only implemented for local running") _, expanded_card = util.expandCard() for runcard, runfolder in expanded_card.items(): running_folder = self.get_local_dir_name(runcard, runfolder) stdout_folder = f"{running_folder}/stdout" # Create the folder in which the stuff will be run, and its stdout if not util.checkIfThere(stdout_folder): os.makedirs(stdout_folder) # Now create also the replica folder to avoid future clashes rep_folder = self.replica_folder(running_folder, runname=runfolder) if not util.checkIfThere(rep_folder): os.makedirs(rep_folder) # And finally copy the runcard from pyHepGrid.src.header import runcardDir runcard_path = f"{runcardDir}/{runcard}" from shutil import copy copy(runcard_path, running_folder) logger.info("run initialized")
def get_id(self, db_id): """ Returns a list of DIRAC/ARC jobids for a given database entry """ jobid = self.dbase.list_data(self.table, ["jobid"], db_id) try: idout = jobid[0]['jobid'] except IndexError: logger.info("Selected job is %s out of bounds" % jobid) idt = input("> Select id to act upon: ") idout = self.get_id(idt) jobid_list = idout.split(" ") if self.act_only_on_done: new_list = [] status_list = self._get_old_status(db_id) if status_list: for jobid, stat in zip(jobid_list, status_list): if stat == self.cDONE: new_list.append(jobid) return new_list else: logger.critical( "In order to act only on 'done' jobs you need to have that " "info in the db!") else: return jobid_list
def check_for_existing_output(self, r, rname): """ Check whether given runcard already has output in the grid needs testing as it needs to be able to remove (many) things for production run It relies on the base seed from the src.header file to remove the output """ from pyHepGrid.src.header import grid_output_dir, logger logger.debug(F"Checking whether runcard {rname} has output for seeds " "that you are trying to submit...") checkname = r + "-" + rname files = self.gridw.get_dir_contents(grid_output_dir) first = True if checkname in files: from pyHepGrid.src.header import baseSeed, producRun for seed in range(baseSeed, baseSeed + producRun): filename = self.output_name(r, rname, seed) if filename in files: if first: self._press_yes_to_continue( "This runcard already has at least one output file " "at gfal:output with a seed you are trying to " F"submit (looked for '{checkname}').\n" "If you continue, it will be removed now.") logger.warning( F"Runcard {r} has at least one file at output") first = False self.gridw.delete(filename, grid_output_dir) logger.info("Output check complete")
def _get_local_warmup_name(self, matchname, provided_warmup): from shutil import copy exclude_patterns = [".txt", ".log"] if os.path.isdir(provided_warmup): matches = [] potential_files = os.listdir(provided_warmup) for potfile in potential_files: if potfile.lower().startswith(matchname) \ and not any(potfile.endswith(p) for p in exclude_patterns): matches.append(potfile) if len(matches) > 1: logger.critical("Multiple warmup matches found in {1}: {0}".format(" ".join(i for i in matches), provided_warmup)) elif len(matches) ==0 : logger.critical("No warmup matches found in {0}.".format(provided_warmup)) else: match = os.path.join(provided_warmup, matches[0]) else: match = provided_warmup logger.info("Using warmup {0}".format(match)) if not match in os.listdir(sys.path[0]): local_match = False copy(match, os.path.basename(match)) match = os.path.basename(match) else: local_match = True return match, local_match
def check_for_existing_output(self, r, rname): """ Check whether given runcard already has output in the grid needs testing as it needs to be able to remove (many) things for production run It relies on the base seed from the src.header file to remove the output """ from pyHepGrid.src.header import grid_output_dir, logger logger.debug( "Checking whether runcard {0} has output for seeds that you are trying to submit..." .format(rname)) checkname = r + "-" + rname files = self.gridw.get_dir_contents(grid_output_dir) first = True if checkname in files: from pyHepGrid.src.header import baseSeed, producRun for seed in range(baseSeed, baseSeed + producRun): filename = self.output_name(r, rname, seed) if filename in files: if first: self._press_yes_to_continue( "It seems this runcard already has at least one file at lfn:output with a seed you are trying to submit (looked for {}). Do you want to remove it/them?" .format(checkname)) logger.warning( "Runcard {0} has at least one file at output". format(r)) first = False self.gridw.delete(filename, grid_output_dir) logger.info("Output check complete")
def get_date(self, db_id): """ Returns date from a given database entry """ jobid = self.dbase.list_data(self.table, ["date"], db_id) try: idout = jobid[0]['date'] except IndexError: logger.info("Selected job is %s out of bounds" % jobid) idt = input("> Select id to act upon: ") idout = self.get_date(idt) return idout
def generic_initialise(runcard, warmup=False, production=False, grid=None, overwrite_grid=False, local=False): logger.info("Initialising runcard: {0}".format(runcard)) back = Backend() if warmup: if overwrite_grid: back.set_overwrite_warmup() back.init_warmup(grid, continue_warmup=overwrite_grid, local=local) elif production: back.init_production(grid, continue_warmup=overwrite_grid, local=local) else: logger.critical("Both warmup and production not selected. What do you want me to initialise?")
def _do_get_data(self, filename): """ Multithread wrapper used in get_data_production to download information from the grid storage """ local_name = filename.replace("output", "") local_file = self.rfolder + "/" + local_name self.gridw.bring(filename, header.grid_output_dir, local_file, timeout = header.timeout) if os.path.isfile(local_name): global counter if counter: counter.value += 1 logger.info("Downloaded {0} files ".format(counter.value), end='\r') return local_name else: return None
def _do_extract_outputData(self, tarfile): """ Multithread wrapper used in get_data_production for untaring files """ # It assumes log and dat folder are already there if not os.path.isfile(tarfile): logger.info("{0} not found".format(tarfile)) return -1 out_dict = {".log" : "log/", ".dat" : "dat/" } self.tarw.extract_extension_to(tarfile, out_dict) util.spCall(["rm", tarfile]) return 0
def check_warmup_files(self, db_id, rcard, resubmit=False): """ Provides stats on whether a warmup file exists for a given run and optionally resubmit if absent """ import tempfile import tarfile origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) rncards, dCards = util.expandCard() tags = ["runcard", "runfolder"] runcard_info = self.dbase.list_data(self.table, tags, db_id)[0] runcard = runcard_info["runcard"] rname = runcard_info["runfolder"] try: warmup_files = self._bring_warmup_files(runcard, rname, check_only=True) if warmup_files == []: status = "\033[93mMissing\033[0m" else: status = "\033[92mPresent\033[0m" except tarfile.ReadError: status = "\033[91mCorrupted\033[0m" run_id = "{0}-{1}:".format(runcard, rname) logger.info("[{0}] {2:55} {1:>20}".format(db_id, status, run_id)) os.chdir(origdir) if resubmit and "Present" not in status: done, wait, run, fail, unk = self.stats_job(db_id, do_print=False) if run + wait > 0: # Be more careful in case of unknown status logger.warning("Job still running. Not resubmitting for now") else: # Need to override dictCard for single job submission expandedCard = ([runcard], {runcard: rname}) logger.info( "Warmup not found and job ended. Resubmitting to ARC") from pyHepGrid.src.runArcjob import runWrapper runWrapper(rcard, expandedCard=expandedCard)
def do_management(args, rcard): #### Management of running/finished jobs backend_setups = { "runArc": { "backend": bm.Arc, "kwargs": { "production": False } }, "runArcProduction": { "backend": bm.Arc, "kwargs": { "production": True } }, "runDirac": { "backend": bm.Dirac, "kwargs": {} }, "runSlurm": { "backend": bm.Slurm, "kwargs": { "production": False } }, "runSlurmProduction": { "backend": bm.Slurm, "kwargs": { "production": True } } } for _backend in backend_setups: if getattr(args, _backend): # If mode is selected backend_opt = backend_setups[_backend] kwargs = backend_opt["kwargs"] backend = backend_opt["backend"](act_only_on_done=args.done, **kwargs) logger.info("{0}".format(backend)) mr.management_routine(backend, args)
def get_grid_from_stdout(self, jobid, jobinfo): from pyHepGrid.src.header import default_runfolder import re stdout = "\n".join(self.cat_job(jobid, jobinfo, store=True)) try: gridname = [i for i in stdout.split("\n") if "Writing grid" in i][0].split()[-1].strip() logger.info("Grid name from stdout: {0}".format(gridname)) except IndexError as e: logger.critical("No grid filename found in stdout logs. Did the warmup write a grid?") result = re.search('vegas warmup to stdout(.*)End', stdout, flags=re.S) # Thanks StackOverflow try: grid = result.group(1) except IndexError as e: logger.critical("No grid found in stdout logs. Did the warmup write a grid?") logger.info("Grid extracted successfully") if default_runfolder is None: base = header.warmup_base_dir else: base = header.default_runfolder outloc = os.path.join(base, jobinfo["runfolder"], jobinfo["runcard"]) grid_fname = os.path.join(outloc, gridname) os.makedirs(outloc, exist_ok=True) if os.path.exists(grid_fname): self._press_yes_to_continue(" \033[93m WARNING:\033[0m Grid file already exists at {0}. do you want to overwrite it?".format(grid_fname)) with open(grid_fname, "w") as gridfile: gridfile.write(grid) logger.info("Grid written locally to {0}".format(os.path.relpath(grid_fname)))
def check_runcard_multichannel(self, runcard_obj): try: multichannel_val = runcard_obj.runcard_dict["run"]["multi_channel"] if multichannel_val.lower() == ".true.": logger.info("Multichannel switched ON in runcard") multichannel=True else: multichannel=False logger.info("Multichannel switched OFF in runcard") except KeyError as e: multichannel = False logger.info("Multichannel not enabled in runcard") return multichannel
def check_for_existing_warmup(self, r, rname): logger.info( "Checking for prior warmup output which this warmup run will " "overwrite...") checkname = self.warmup_name(r, rname) if self.gridw.checkForThis(checkname, grid_warmup_dir): self._press_yes_to_continue( F"Prior warmup output file {checkname} already exists at " F"gfal:~/{grid_warmup_dir}. Do you want to remove it?") self.gridw.delete(checkname, grid_warmup_dir) else: logger.info("None found.") logger.info("Checking for prior socket warmup backups which this warmup" " run will overwrite...") checkname = self.warmup_sockets_dirname(r, rname) if self.gridw.checkForThis(checkname, grid_warmup_dir): self._press_yes_to_continue( F"Prior socketed warmup backups {checkname} exist at " F"gfal:~/{grid_warmup_dir}. Do you want to remove the directory " "and its contents?") self.gridw.delete_directory(checkname, grid_warmup_dir) else: logger.info("None found.")
def init_warmup(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a warmup run. An warmup file can be provided and it will be added to the .tar file sent to the grid storage. Steps are: 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ from shutil import copy import tempfile from pyHepGrid.src.header import executable_src_dir, executable_exe, logger from pyHepGrid.src.header import runcardDir as runFol if local: self.init_local_warmups(provided_warmup=provided_warmup, continue_warmup=continue_warmup) return origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) rncards, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) if not os.path.isfile(path_to_exe_full): logger.critical("Could not find executable at {0}".format(path_to_exe_full)) copy(path_to_exe_full, os.getcwd()) files = [executable_exe] for idx, i in enumerate(rncards): logger.info("Initialising {0} [{1}/{2}]".format(i, idx+1, len(rncards))) local = False warmupFiles = [] # Check whether warmup/production is active in the runcard runcard_file = os.path.join(runFol, i) if not os.path.isfile(runcard_file): self._press_yes_to_continue("Could not find runcard {0}".format(i), error="Could not find runcard") runcard_obj = PROGRAMruncard(runcard_file, logger=logger, use_cvmfs=header.use_cvmfs_lhapdf, cvmfs_loc=header.cvmfs_lhapdf_location) self._check_warmup(runcard_obj, continue_warmup) multichannel = self.check_runcard_multichannel(runcard_obj) if provided_warmup: # Copy warmup to current dir if not already there match, local = self._get_local_warmup_name(runcard_obj.warmup_filename(), provided_warmup) files += [match] rname = dCards[i] tarfile = i + rname + ".tar.gz" copy(os.path.join(runFol, i), os.getcwd()) if self.overwrite_warmup: checkname = self.warmup_name(i, rname) if self.gridw.checkForThis(checkname, header.grid_warmup_dir): logger.info("Warmup found in GFAL:{0}!".format(header.grid_warmup_dir)) warmup_files = self._bring_warmup_files(i, rname, shell=True, multichannel=multichannel) files += warmup_files logger.info("Warmup files found: {0}".format(" ".join(i for i in warmup_files))) self.tarw.tarFiles(files + [i], tarfile) if self.gridw.checkForThis(tarfile, header.grid_input_dir): # Could we cache this? Just to speed up ini logger.info("Removing old version of {0} from Grid Storage".format(tarfile)) self.gridw.delete(tarfile, header.grid_input_dir) logger.info("Sending {0} to gfal {1}/".format(tarfile, header.grid_input_dir)) self.gridw.send(tarfile, header.grid_input_dir, shell=True) if not local: for j in warmupFiles: os.remove(j) os.remove(i) os.remove(tarfile) os.remove(executable_exe) os.chdir(origdir)
def init_production(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a production run. If a warmup file is provided retrieval step is skipped Steps are: 0 - Retrieve warmup from the grid/local 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ from shutil import copy import tempfile from pyHepGrid.src.header import runcardDir as runFol from pyHepGrid.src.header import executable_exe, executable_src_dir, logger if local: self.init_local_production(provided_warmup=provided_warmup) return rncards, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) if not os.path.isfile(path_to_exe_full): logger.critical("Could not find executable at {0}".format(path_to_exe_full)) copy(path_to_exe_full, os.getcwd()) files = [executable_exe] for idx, i in enumerate(rncards): logger.info("Initialising {0} [{1}/{2}]".format(i, idx+1, len(rncards))) local = False # Check whether warmup/production is active in the runcard runcard_file = os.path.join(runFol, i) runcard_obj = PROGRAMruncard(runcard_file, logger=logger, use_cvmfs=header.use_cvmfs_lhapdf, cvmfs_loc=header.cvmfs_lhapdf_location) multichannel = self.check_runcard_multichannel(runcard_obj) self._check_production(runcard_obj) rname = dCards[i] tarfile = i + rname + ".tar.gz" copy(os.path.join(runFol, i), os.getcwd()) if provided_warmup: match, local = self._get_local_warmup_name(runcard_obj.warmup_filename(), provided_warmup) warmupFiles = [match] elif header.provided_warmup_dir: match, local = self._get_local_warmup_name(runcard_obj.warmup_filename(), header.provided_warmup_dir) warmupFiles = [match] else: logger.info("Retrieving warmup file from grid") warmupFiles = self._bring_warmup_files(i, rname, shell=True, multichannel=multichannel) self.tarw.tarFiles(files + [i] + warmupFiles, tarfile) if self.gridw.checkForThis(tarfile, header.grid_input_dir): logger.info("Removing old version of {0} from Grid Storage".format(tarfile)) self.gridw.delete(tarfile, header.grid_input_dir) logger.info("Sending {0} to GFAL {1}/".format(tarfile, header.grid_input_dir)) self.gridw.send(tarfile, header.grid_input_dir, shell=True) if local: util.spCall(["rm", i, tarfile]) else: util.spCall(["rm", i, tarfile] + warmupFiles) os.remove(executable_exe) os.chdir(origdir)
def init_production(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a production run. If a warmup file is provided retrieval step is skipped Steps are: 0 - Retrieve warmup from the grid/local 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ import tempfile from pyHepGrid.src.header import runcardDir as runFol from pyHepGrid.src.header import executable_exe, executable_src_dir, grid_input_dir if local: self.init_local_production(provided_warmup=provided_warmup) return rncards, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) if provided_warmup: warmup_base = provided_warmup elif header.provided_warmup_dir: warmup_base = header.provided_warmup_dir else: # print("Retrieving warmup file from grid") # warmupFiles = self._bring_warmup_files(i, dCards[i], shell=True) logger.critical("Retrieving warmup file from grid: Not implemented") # setup LHAPDF if header.use_cvmfs_lhapdf: os.environ['LHAPDF_DATA_PATH'] = header.cvmfs_lhapdf_location # create Process dir in Sherpa self._init_Sherpa(warmup_base,rncards) os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) # if not os.path.isfile(path_to_exe_full): # logger.critical("Could not find executable at {0}".format(path_to_exe_full)) # copy(path_to_exe_full, os.getcwd()) # files = [executable_exe] for idx, i in enumerate(rncards): local = False tarfile = i +"+"+ dCards[i] + ".tar.gz" base_folder = i.split("-")[0] + "/" logger.info("Initialising {0} to {1} [{2}/{3}]".format(i, tarfile, idx+1, len(rncards))) # runcards run_dir = runFol + base_folder runFiles = [dCards[i]+".yml"] for f in runFiles: os.system("cp -r "+run_dir+f+" "+tmpdir) # warmup files warmupFiles = ["Process", "Run.dat", "Results.db"] for f in warmupFiles: os.system("cp -r "+warmup_base+base_folder+f+" "+tmpdir) # tar up & send to grid storage self.tarw.tarFiles(warmupFiles+runFiles, tarfile) if self.gridw.checkForThis(tarfile, grid_input_dir): logger.info("Removing old version of {0} from Grid Storage".format(tarfile)) self.gridw.delete(tarfile, grid_input_dir) logger.info("Sending {0} to {1}".format(tarfile, grid_input_dir)) self.gridw.send(tarfile, grid_input_dir, shell=True) # clean up afterwards os.chdir(origdir) os.system("rm -r "+tmpdir)
def _get_data_production(self, db_id): """ Given a database entry, retrieve its data from the output folder to the folder defined in said db entry """ logger.info("You are going to download all folders corresponding to this runcard from grid output") logger.info("Make sure all runs are finished using the -s or -S options!") fields = ["runfolder", "runcard", "jobid", "pathfolder", "iseed"] data = self.dbase.list_data(self.table, fields, db_id)[0] self.rcard = data["runcard"] self.rfolder = data["runfolder"] pathfolderTp = data["pathfolder"] initial_seed = data["iseed"] pathfolder = util.sanitiseGeneratedPath(pathfolderTp, self.rfolder) jobids = data["jobid"].split(" ") finalSeed = int(initial_seed) + len(jobids) if initial_seed == "None": initial_seed = self.bSeed else: initial_seed = int(initial_seed) while True: firstName = self.output_name(self.rcard, self.rfolder, initial_seed) finalName = self.output_name(self.rcard, self.rfolder, finalSeed) logger.info("The starting filename is {}".format(firstName)) logger.info("The final filename is {}".format(finalName)) yn = self._press_yes_to_continue("If you are ok with this, press y", fallback = -1) if yn == 0: break initial_seed = int(input("Please, introduce the starting seed (ex: 400): ")) finalSeed = int(input("Please, introduce the final seed (ex: 460): ")) try: os.makedirs(self.rfolder) except OSError as err: if err.errno == 17: logger.info("Tried to create folder %s in this directory".format(self.rfolder)) logger.info("to no avail. We are going to assume the directory was already there") self._press_yes_to_continue("", "Folder {} already exists".format(self.rfolder)) else: raise os.chdir(self.rfolder) try: os.makedirs("log") os.makedirs("dat") except: # todo: macho... this is like mkdir -p :P pass seeds = range(initial_seed, finalSeed) # If we are only act on a subrange of jobids (ie, the ones which are done...) choose only those seeds if self.act_only_on_done: old_status = self._get_old_status(db_id) if old_status: new_seed = [] for seed, stat in zip(seeds, old_status): if stat == self.cDONE: new_seed.append(seed) seeds = new_seed from pyHepGrid.src.header import finalise_no_cores as n_threads # Check which of the seeds actually produced some data all_remote = self.output_name_array(self.rcard, self.rfolder, seeds) all_output = self.gridw.get_dir_contents(header.grid_output_dir).split() remote_tarfiles = list(set(all_remote) & set(all_output)) logger.info("Found data for {0} of the {1} seeds.".format(len(remote_tarfiles), len(seeds))) # Download said data tarfiles = self._multirun(self._do_get_data, remote_tarfiles, n_threads, use_counter = True) tarfiles = list(filter(None, tarfiles)) logger.info("Downloaded 0 files", end ='\r') logger.info("Downloaded {0} files, extracting...".format(len(tarfiles))) # Extract some information from the first tarfile for tarfile in tarfiles: if self._extract_output_warmup_data(tarfile): break # Extract all dummy = self._multirun(self._do_extract_outputData, tarfiles, n_threads) os.chdir("..") logger.info("Everything saved at {0}".format(pathfolder)) util.spCall(["mv", self.rfolder, pathfolder])
def _get_data_warmup(self, db_id): """ Given a database entry, retrieve its data from the warmup folder to the folder defined in said database entry For arc jobs stdoutput will be downloaded in said folder as well """ # Retrieve data from database from pyHepGrid.src.header import arcbase, grid_warmup_dir fields = ["runcard","runfolder", "jobid", "pathfolder"] data = self.dbase.list_data(self.table, fields, db_id)[0] runfolder = data["runfolder"] finfolder = data["pathfolder"] + "/" + runfolder runcard = data["runcard"] jobids = data["jobid"].split() util.spCall(["mkdir", "-p", finfolder]) logger.info("Retrieving ARC output into " + finfolder) try: # Retrieve ARC standard output for every job of this run for jobid in jobids: logger.info(jobid) cmd = [self.cmd_get, "-j", arcbase, jobid.strip()] output = util.getOutputCall(cmd) outputfol = output.split("Results stored at: ")[1].rstrip() outputfolder = outputfol.split("\n")[0] if outputfolder == "" or (len(outputfolder.split(" ")) > 1): logger.info("Running mv and rm command is not safe here") logger.info("Found blank spaces in the output folder") logger.info("Nothing will be moved to the warmup global folder") else: destination = finfolder + "/" + "arc_out_" + runcard + outputfolder util.spCall(["mv", outputfolder, destination]) #util.spCall(["rm", "-rf", outputfolder]) except: logger.info("Couldn't find job output in the ARC server") logger.info("jobid: " + jobid) logger.info("Run arcstat to check the state of the job") logger.info("Trying to retrieve data from grid storage anyway") # Retrieve warmup from the grid storage warmup folder wname = self.warmup_name(runcard, runfolder) self.gridw.bring(wname, grid_warmup_dir, finfolder + "/" + wname)
def _bring_warmup_files(self, runcard, rname, shell=False, check_only=False, multichannel=False): """ Download the warmup file for a run to local directory extracts Vegas grid and log file and returns a list with their names check_only flag doesn't error out if the warmup doesn't exist, instead just returns and empty list for later use [intended for checkwarmup mode so multiple warmups can be checked consecutively. """ from pyHepGrid.src.header import grid_warmup_dir, logger gridFiles = [] suppress_errors = False if check_only: suppress_errors = True ## First bring the warmup .tar.gz outnm = self.warmup_name(runcard, rname) logger.debug("Warmup GFAL name: {0}".format(outnm)) tmpnm = "tmp.tar.gz" logger.debug("local tmp tar name: {0}".format(tmpnm)) success = self.gridw.bring(outnm, grid_warmup_dir, tmpnm, shell = shell, suppress_errors=suppress_errors) success == self.__check_pulled_warmup(success, tmpnm, warmup_extensions) if not success and not check_only: if self._press_yes_to_continue("Grid files failed to copy. Try backups from individual sockets?") == 0: backup_dir = os.path.join(grid_warmup_dir,outnm.replace(".tar.gz","")) backups = self.gridw.get_dir_contents(backup_dir) if len(backups) == 0: logger.critical("No backups found. Did the warmup complete successfully?") else: backup_files = backups.split() for idx, backup in enumerate(backup_files): logger.info("Attempting backup {1} [{0}]".format(idx+1, backup)) success = self.gridw.bring(backup, backup_dir, tmpnm, shell = shell, force=True) success == self.__check_pulled_warmup(success, tmpnm, warmup_extensions) if success: break if not success: logger.critical("Grid files failed to copy. Did the warmup complete successfully?") else: logger.info("Grid files copied ok.") ## Now extract only the Vegas grid files and log file gridp = warmup_extensions gridp += [i+"_channel" for i in gridp] extractFiles = self.tarw.extract_extensions(tmpnm, gridp+[".log",".txt","channels"]) try: gridFiles = [i for i in extractFiles if ".log" not in i] logfile = [i for i in extractFiles if ".log" in i][0] except IndexError as e: if not check_only: logger.critical("Logfile not found. Did the warmup complete successfully?") else: return [] if multichannel and len([i for i in gridFiles if "channels" in i]) ==0: logger.critical("No multichannel warmup found, but multichannel is set in the runcard.") elif multichannel: logger.info("Multichannel warmup files found.") if gridFiles == [] and not check_only: # No grid files found logger.critical("Grid files not found in warmup tarfile. Did the warmup complete successfully?") elif gridFiles == []: return [] ## Tag log file as -warmup newlog = logfile + "-warmup" os.rename(logfile, newlog) # Remove temporary tar files os.remove(tmpnm) gridFiles.append(newlog) # Make sure access to the file is correct! for i in gridFiles: util.spCall(["chmod", "a+wrx", i]) return gridFiles
import pyHepGrid.src.dbapi from pyHepGrid.src.header import logger import pyHepGrid.src.utilities as util import pyHepGrid.src.header as header import pyHepGrid.src.runmodes import multiprocessing as mp import sys counter = None if header.runmode.upper() in pyHepGrid.src.runmodes.mode_selector: _mode = pyHepGrid.src.runmodes.mode_selector[header.runmode.upper()] else: package, module = header.runmode.rsplit('.', 1) _mode = getattr(importlib.import_module(package), module) logger.info(f"Overriding run mode to {_mode}") def init_counter(args): global counter counter = args class Backend(_mode): """ Abstract class for common functions needed for all Backends, to be inherited by their subclass. """ cDONE = 1 cWAIT = 0 cFAIL = -1
def get_local_dir_name(self, runcard, tag): # Suitable defaults runname = "{0}-{1}".format(runcard, tag) dir_name = os.path.join(local_run_directory, runname) logger.info("Run directory: {0}".format(dir_name)) return dir_name
def get_local_dir_name(self, runcard, tag): # Suitable defaults runname = F"{runcard}-{tag}" dir_name = os.path.join(local_run_directory, runname) logger.info(F"Run directory: {dir_name}") return dir_name
def init_production(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a production run. If a warmup file is provided retrieval step is skipped/ Steps are: 0 - Retrieve warmup from the grid/local 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ import tempfile from pyHepGrid.src.header import runcardDir as runFol from pyHepGrid.src.header import executable_exe, executable_src_dir, \ grid_input_dir if local: self.init_local_production(provided_warmup=provided_warmup) return runFolders, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) if provided_warmup: warmup_base = provided_warmup elif header.provided_warmup_dir: warmup_base = header.provided_warmup_dir else: logger.critical( "Retrieving warmup file from grid: Not implemented") os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) if not os.path.isfile(path_to_exe_full): logger.critical( "Could not find executable at {0}".format(path_to_exe_full)) else: tar_name = os.path.basename(header.grid_executable) grid_exe_dir = os.path.dirname(header.grid_executable) exe_name = header.executable_exe os.system("cp -r " + path_to_exe_full + " " + exe_name) upload_exe = True if self.gridw.checkForThis(tar_name, grid_exe_dir): if not self._press_yes_to_continue( "Old executable found. Do you want to remove it?", fallback=1): logger.info( F"Removing old version of {tar_name} from Grid Storage" ) self.gridw.delete(tar_name, grid_exe_dir) else: upload_exe = False if upload_exe: self.tarw.tarFiles([exe_name], tar_name) self.gridw.send(tar_name, grid_exe_dir) for idx, runName in enumerate(runFolders): local = False tarfile = runName + ".tar.gz" # base_folder = runName.split("-")[0] # logger.info( # "Initialising {0} to {1} [{2}/{3}]".format( # runName, tarfile, idx + 1, len(runFolders))) # # runcards # run_dir = os.path.join(runFol, base_folder) # runFiles = dCards[runName].split("+") # for f in runFiles: # f = os.path.join(run_dir, f) # self._file_exists(f, logger) # os.system("cp -r " + f + " " + tmpdir) # # warmup files # for f in self._WARMUP_FILES: # f = os.path.join(warmup_base, base_folder, f) # self._file_exists(f, logger) # os.system("cp -r " + f + " " + tmpdir) logger.info("Sending JUNE code to Grid storage.") upload_data = True if self.gridw.checkForThis(tarfile, grid_input_dir): if not self._press_yes_to_continue( "Old data found. Do you want to remove it?", fallback=1): logger.info( F"Removing old version of {tarfile} from Grid Storage") self.gridw.delete(tarfile, grid_input_dir) else: upload_data = False if upload_data: # tar up & send to grid storage # self.tarw.tarFiles(self._WARMUP_FILES + runFiles, tarfile) if self.gridw.checkForThis(tarfile, grid_input_dir): logger.info( "Removing old version of {0} from Grid Storage".format( tarfile)) self.gridw.delete(tarfile, grid_input_dir) logger.info("Sending {0} to {1}".format( tarfile, grid_input_dir)) os.system("gfal-copy {0} {1}".format( origdir + "/" + tarfile, header.gfaldir + grid_input_dir)) # self.gridw.send(tarfile, grid_input_dir) if hasattr(header, 'world_list'): world_list = header.world_list for world in world_list: upload_world = True world_file = "{0}.hdf5".format(world) world_path = grid_input_dir + "/worlds" if self.gridw.checkForThis(world_file, world_path): if not self._press_yes_to_continue( "Old {0} found. Do you want to remove it?".format( world_file), fallback=1): logger.info( F"Removing old version of {world_file} from Grid Storage" ) self.gridw.delete(world_file, world_path) else: upload_world = False if upload_world: print("Sending {0} to Grid Storage.".format(world_file)) if self.gridw.checkForThis(world_file, world_path): logger.info( "Removing old version of {0} from Grid Storage". format(world_file)) self.gridw.delete(world_file, world_path) logger.info("Sending {0} to {1}".format( world_file, world_path)) os.system("gfal-copy {0} {1}".format( origdir + "/" + world_file, header.gfaldir + world_path)) if hasattr(header, 'latin_hypercube'): lhs_file = header.latin_hypercube upload_lhs = True lhs_file = "{0}.npy".format(lhs_file) if self.gridw.checkForThis(lhs_file, grid_input_dir): if not self._press_yes_to_continue( "Old {0} found. Do you want to remove it?".format( lhs_file), fallback=1): logger.info( F"Removing old version of {lhs_file} from Grid Storage" ) self.gridw.delete(lhs_file, grid_input_dir) else: upload_lhs = False if upload_lhs: print("Sending {0} to Grid Storage.".format(lhs_file)) if self.gridw.checkForThis(lhs_file, grid_input_dir): logger.info( "Removing old version of {0} from Grid Storage".format( lhs_file)) self.gridw.delete(lhs_file, grid_input_dir) logger.info("Sending {0} to {1}".format( lhs_file, grid_input_dir)) os.system("gfal-copy {0} {1}".format( origdir + "/" + lhs_file, header.gfaldir + grid_input_dir)) # clean up afterwards os.chdir(origdir) os.system("rm -r " + tmpdir)
def _get_local_warmup_name(self, matchname, provided_warmup): from shutil import copy exclude_patterns = [".txt", ".log"] matchname_case = matchname matchname = matchname.lower() if os.path.isdir(provided_warmup): matches = [] potential_files = os.listdir(provided_warmup) for potfile in potential_files: if potfile.lower().startswith(matchname) and \ not any(potfile.endswith(p) for p in exclude_patterns): matches.append(potfile) if len(matches) > 1: logger.critical( "Multiple warmup matches found in {1}: {0}".format( " ".join(i for i in matches), provided_warmup)) elif len(matches) == 0: logger.info( "No warmup matches found in {0}; " "looking for files with similar names to {1}:".format( provided_warmup, matchname_case)) match = None potmatches = sorted(potential_files, key=lambda x: SequenceMatcher( None, x, matchname_case).ratio(), reverse=True) print(potmatches) if matchname_case.split(".")[-1] in warmup_extensions: potmatches = [ p for p in potmatches if p.lower().endswith( "." + matchname_case.split(".")[-1].lower()) ] for pot in [ p for p in potmatches if ((SequenceMatcher(None, p, matchname_case).ratio( ) > 0.5) and not ( any([p.endswith(ext) for ext in exclude_patterns]))) ]: yn = input( "Partial warmup match found: " "do you want to use {0}? (y/n) ".format(pot)).lower() if yn.startswith("y"): match = os.path.join(provided_warmup, pot) break if not match: logger.info( "No warmup matches found in directory {0}. " "Searching subdirectories:".format(provided_warmup)) potmatches = [] for path, _subdirs, files in os.walk(provided_warmup): for name in files: if SequenceMatcher(None, name, matchname_case).ratio() > 0.5: potmatches.append([path, name]) if matchname_case.split(".")[-1] in warmup_extensions: potmatches = [ p for p in potmatches if p[1].lower().endswith( "." + matchname_case.split(".")[-1].lower()) ] potmatches = sorted( potmatches, key=lambda x: SequenceMatcher(None, x[1], matchname_case).ratio()) potmatches = [os.path.join(p[0], p[1]) for p in potmatches] for pot in [ p for p in potmatches if ((SequenceMatcher(None, p, matchname).ratio() > 0.5) and not ( any([p.endswith(ext) for ext in exclude_patterns]))) ]: yn = input("Partial warmup match found: " "do you want to use {0}? (y/n) ".format( pot)).lower() if yn.startswith("y"): match = os.path.join(provided_warmup, pot) break if not match: logger.critical("No partial matches found; " "best match was {0}.".format( potmatches[0])) else: match = os.path.join(provided_warmup, matches[0]) else: match = provided_warmup logger.info("Using warmup {0}".format(match)) if SequenceMatcher(None, os.path.basename(match), matchname_case).ratio() <= 1: if "." + matchname_case.split(".")[-1] in warmup_extensions: copy(match, matchname_case) print(matchname_case) match = matchname_case else: logger.info( "Unable to predict expected NNLOJET warmup file name.") suff = input( "Expect filename to begin {0}, with unknown extension. " "Please enter expected file extension, " "or 'n' to provide your own filename.".format( matchname_case)) if suff in warmup_extensions: copy(match, matchname_case + "." + suff) match = matchname_case + "." + suff else: fn = input("Please enter the filename NNLOJET will expect " "the warmup to have:") copy(match, fn) match = fn local_match = False else: print( SequenceMatcher(None, os.path.basename(match), matchname_case).ratio()) print(sys.path[0]) if match not in os.listdir(sys.path[0]): local_match = False copy(match, os.path.basename(match)) match = os.path.basename(match) else: local_match = True return match, local_match