def init_local_warmups(self, provided_warmup=None, continue_warmup=False, local=False): rncards, dCards = util.expandCard() for runcard in rncards: self.init_single_local_warmup(runcard, dCards[runcard], provided_warmup=provided_warmup, continue_warmup=continue_warmup)
def run_test(args, runcard): # header.debug_level = 99999 if args.runArc: from pyHepGrid.src.runArcjob import testWrapper elif args.runArcProduction: from pyHepGrid.src.runArcjob import testWrapperProduction as testWrapper elif args.runDirac: from pyHepGrid.src.runDiracjob import testWrapper elif args.runSlurm: from pyHepGrid.src.runSlurmjob import testWrapper elif args.runSlurmProduction: from pyHepGrid.src.runSlurmjob import testWrapperProduction \ as testWrapper else: raise Exception("Choose what you want to test -(A/B/D/E/F)") rncards, dCards = util.expandCard(runcard) # if args.runSlurm: # header.runfile = header.SLURMSCRIPTDEFAULT # if args.runSlurmProduction: # header.runfile = header.SLURMSCRIPTDEFAULT_PRODUCTION setup() for r in rncards: nnlojob_args = testWrapper(r, dCards).replace("\"", "").split() runfile = os.path.basename(header.runfile) util.spCall(["chmod", "+x", runfile]) util.spCall(["./{0}".format(runfile)] + nnlojob_args)
def init_production(self, provided_warmup=None, continue_warmup=False, local=True): """ Initialize production (single mode) """ if not local: raise NotImplementedError( "N3FIT is only implemented for local running") _, expanded_card = util.expandCard() for runcard, runfolder in expanded_card.items(): running_folder = self.get_local_dir_name(runcard, runfolder) stdout_folder = f"{running_folder}/stdout" # Create the folder in which the stuff will be run, and its stdout if not util.checkIfThere(stdout_folder): os.makedirs(stdout_folder) # Now create also the replica folder to avoid future clashes rep_folder = self.replica_folder(running_folder, runname=runfolder) if not util.checkIfThere(rep_folder): os.makedirs(rep_folder) # And finally copy the runcard from pyHepGrid.src.header import runcardDir runcard_path = f"{runcardDir}/{runcard}" from shutil import copy copy(runcard_path, running_folder) logger.info("run initialized")
def run_wrap_production(self, test=False): """ Wrapper function. It assumes the initialisation stage has already happend Writes sbatch file with the appropiate information and sends producrun # of jobs to the queue """ # runcard names (of the form foo.run) # dCards, dictionary of { 'runcard' : 'name' } # can also include extra info rncards, dCards = util.expandCard() if test: from pyHepGrid.src.header import test_queue as queue else: from pyHepGrid.src.header import production_queue as queue job_type = "Production" self.runfolder = header.runcardDir from pyHepGrid.src.header import producRun, baseSeed, production_threads # loop over all .run files defined in runcard.py header.logger.info("Runcards selected: {0}".format(" ".join(rncards))) for r in rncards: self.check_for_existing_output_local(r, dCards[r], baseSeed, producRun) # Generate the SLURM file arguments = self._get_production_args(r, dCards[r], baseSeed, producRun, production_threads, array=True, queue=queue) slurmfile = self._write_SLURM(arguments, self.prodtempl) header.logger.debug("Path of slurm file: {0}".format(slurmfile)) jobids = [] jobid, runqueue = self._run_SLURM(slurmfile, arguments, queue, test=test) jobids.append(jobid) # Create database entry dataDict = { 'jobid': ' '.join(jobids), 'date': str(datetime.now()), 'pathfolder': arguments["runcard_dir"], 'runcard': r, 'runfolder': dCards[r], 'jobtype': job_type, 'queue': str(runqueue), 'iseed': str(baseSeed), 'no_runs': str(producRun), 'status': "active", } if len(jobids) > 0: self.dbase.insert_data(self.table, dataDict) else: header.logger.critical( "No jobids returned, no database entry inserted for " F"submission: {r} {dCards[r]}")
def run_wrap_production(self, test=False): """ Wrapper function. It assumes the initialisation stage has already happened Writes JDL file with the appropiate information and send procrun number of jobs to the diract management system """ if test: raise NotImplementedError("'Test' mode unavailable for Dirac.") rncards, dCards = util.expandCard() header.logger.info("Runcards selected: {0}".format(" ".join( r for r in rncards))) self.runfolder = header.runcardDir from pyHepGrid.src.header import baseSeed, producRun, jobName increment = 750 for r in rncards: header.logger.info( "> Submitting {0} job(s) for {1} to Dirac".format( producRun, r)) header.logger.info( "> Beginning at seed {0} in batches of {1}.".format( baseSeed, increment)) self.check_for_existing_output(r, dCards[r]) jdlfile = None args = self._get_prod_args(r, dCards[r], "%s") joblist, remaining_seeds, seed_start = [], producRun, baseSeed while remaining_seeds > 0: no_seeds = min(increment, remaining_seeds) jdlfile = self._write_JDL(args, seed_start, no_seeds, jobName) max_seed = seed_start + no_seeds - 1 header.logger.info( " > jdl file path for seeds {0}-{1}: {2}".format( seed_start, max_seed, jdlfile)) joblist += self._run_JDL(jdlfile) remaining_seeds = remaining_seeds - no_seeds seed_start = seed_start + no_seeds # Create daily path pathfolder = util.generatePath(False) # Create database entr jobStr = ' '.join(joblist) dataDict = { 'jobid': jobStr, 'date': str(datetime.now()), 'pathfolder': pathfolder, 'runcard': r, 'runfolder': dCards[r], 'iseed': str(baseSeed), 'no_runs': str(producRun), 'jobtype': "Production", 'status': "active", } self.dbase.insert_data(self.table, dataDict)
def check_warmup_files(self, db_id, rcard, resubmit=False): """ Provides stats on whether a warmup file exists for a given run and optionally resubmit if absent """ import tempfile import tarfile origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) rncards, dCards = util.expandCard() tags = ["runcard", "runfolder"] runcard_info = self.dbase.list_data(self.table, tags, db_id)[0] runcard = runcard_info["runcard"] rname = runcard_info["runfolder"] try: warmup_files = self._bring_warmup_files(runcard, rname, check_only=True) if warmup_files == []: status = "\033[93mMissing\033[0m" else: status = "\033[92mPresent\033[0m" except tarfile.ReadError: status = "\033[91mCorrupted\033[0m" run_id = "{0}-{1}:".format(runcard, rname) logger.info("[{0}] {2:55} {1:>20}".format(db_id, status, run_id)) os.chdir(origdir) if resubmit and "Present" not in status: done, wait, run, fail, unk = self.stats_job(db_id, do_print=False) if run + wait > 0: # Be more careful in case of unknown status logger.warning("Job still running. Not resubmitting for now") else: # Need to override dictCard for single job submission expandedCard = ([runcard], {runcard: rname}) logger.info( "Warmup not found and job ended. Resubmitting to ARC") from pyHepGrid.src.runArcjob import runWrapper runWrapper(rcard, expandedCard=expandedCard)
def init_local_production(self, provided_warmup=None, local=False): rncards, dCards = util.expandCard() for runcard in rncards: self.init_single_local_production(runcard, dCards[runcard], provided_warmup=provided_warmup)
def run_wrap_warmup(self, test=False, expandedCard=None): """ Wrapper function. It assumes the initialisation stage has already happend Writes XRSL file with the appropiate information and send one single job (or n_sockets jobs) to the queue ExpandedCard is an override for util.expandCard for use in auto-resubmission """ from pyHepGrid.src.header import warmupthr, jobName, warmup_base_dir # runcard names (of the form foo.run) # dCards, dictionary of { 'runcard' : 'name' }, can include extra info if expandedCard is None: rncards, dCards = util.expandCard() else: rncards, dCards = expandedCard if header.sockets_active > 1: sockets = True n_sockets = header.sockets_active if ".dur.scotgrid.ac.uk" not in header.ce_base: header.logger.critical("Can't submit socketed warmups " "to locations other than Durham") header.logger.info( f"Current submission computing element: {header.ce_base}") else: sockets = False n_sockets = 1 if test: job_type = "Warmup Test" else: job_type = "Warmup" # Sanity checks for test queue if test and warmupthr > 2: self._press_yes_to_continue( " \033[93m WARNING:\033[0m About to submit job(s) to the test" " queue with {0} threads each.".format(warmupthr)) if test and n_sockets > 2: self._press_yes_to_continue( " \033[93m WARNING:\033[0m About to submit job(s) to the test" " queue with {0} sockets each.".format(n_sockets)) self.runfolder = header.runcardDir # loop over all .run files defined in runcard.py header.logger.info("Runcards selected: {0}".format(" ".join( r for r in rncards))) port = header.port for r in rncards: if n_sockets > 1: # Automagically activates the socket and finds the best port for # it! port = sapi.fire_up_socket_server(header.server_host, port, n_sockets, header.wait_time, header.socket_exe, tag="{0}-{1}".format( r, dCards[r])) job_type = "Socket={}".format(port) # Check whether this run has something on the gridStorage self.check_for_existing_warmup(r, dCards[r]) # Generate the XRSL file arguments = self._get_warmup_args(r, dCards[r], threads=warmupthr, sockets=sockets, port=port) dictData = { 'arguments': arguments, 'jobName': jobName, 'count': str(warmupthr), 'countpernode': str(warmupthr), } xrslfile = self._write_XRSL(dictData, filename=None) header.logger.debug(" > Path of xrsl file: {0}".format(xrslfile)) jobids = [] keyquit = None try: for socket in range(n_sockets): # Run the file jobid, retcode = self._run_XRSL(xrslfile, self._get_ce(test, socket)) if int(retcode) != 0: jobid = "None" jobids.append(jobid) except Exception as interrupt: print("\n") header.logger.error( "Submission error encountered. Inserting all successful " "submissions to database") keyquit = interrupt # Create daily path finally: if warmup_base_dir is not None: pathfolder = util.generatePath(warmup=True) else: pathfolder = "None" # Create database entry dataDict = { 'jobid': ' '.join(jobids), 'date': str(datetime.now()), 'pathfolder': pathfolder, 'runcard': r, 'runfolder': dCards[r], 'jobtype': job_type, 'status': "active", } if len(jobids) > 0: self.dbase.insert_data(self.table, dataDict) else: header.logger.critical( "No jobids returned, no database entry inserted for " "submission: {0} {1}".format(r, dCards[r])) port += 1 if keyquit is not None: raise keyquit
def run_wrap_production(self, test=False): """ Wrapper function. It assumes the initialisation stage has already happend Writes XRSL file with the appropiate information and send a producrun number of jobs to the arc queue """ from pyHepGrid.src.header import baseSeed, producRun, jobName # runcard names (keys) # dCards, dictionary of { 'runcard' : 'name' } rncards, dCards = util.expandCard() self.runfolder = header.runcardDir job_type = "Production" header.logger.info("Runcards selected: {0}".format(" ".join( r for r in rncards))) for r in rncards: joblist = [] # Check whether this run has something on the gridStorage self.check_for_existing_output(r, dCards[r]) # use the same unique name for all seeds since # we cannot multiprocess the arc submission keyquit = None # Sanity check for test queue if test and producRun > 5: self._press_yes_to_continue( " \033[93m WARNING:\033[0m About to submit a large " "number ({0}) of jobs to the test queue.".format( producRun)) # use iterator for memory reasons :) from multiprocessing import Manager # Use shared memory list in case of submission failure jobids = Manager().list() arg_sets = self._arg_iterator(r, dCards, jobName, baseSeed, producRun, test, jobids) try: joblist = self._multirun(self._run_single_production, arg_sets, n_threads=min( header.arc_submit_threads, producRun)) except (Exception, KeyboardInterrupt) as interrupt: print("\n") joblist = jobids header.logger.error( "Submission error encountered. " "Inserting all successful submissions to database") keyquit = interrupt # Create daily path pathfolder = util.generatePath(warmup=False) # Create database entry jobStr = ' '.join(joblist) dataDict = { 'jobid': jobStr, 'date': str(datetime.now()), 'pathfolder': pathfolder, 'runcard': r, 'jobtype': job_type, 'runfolder': dCards[r], 'iseed': str(baseSeed), 'no_runs': str(producRun), 'status': "active", } if len(joblist) > 0: self.dbase.insert_data(self.table, dataDict) # Set jobs to failed status if no jobid returned dbid = self.get_active_dbids()[-1] statuses = [ self.cUNK if i != "None" else self.cMISS for i in joblist ] self._set_new_status(dbid, statuses) else: header.logger.critical( "No jobids returned, no database entry inserted for " "submission: {0} {1}".format(r, dCards[r])) if keyquit is not None: raise keyquit
def run_wrap_production(self, test=None): """ Wrapper function. It assumes the initialisation stage has already happend Writes XRSL file with the appropiate information and send a producrun number of jobs to the arc queue """ from pyHepGrid.src.header import baseSeed, producRun, jobName, lhapdf_grid_loc, lhapdf_loc, executable_exe # runcard names (keys) # dCards, dictionary of { 'runcard' : 'name' } rncards, dCards = util.expandCard() self.runfolder = header.runcardDir job_type = "Production" header.logger.info("Runcards selected: {0}".format(" ".join( r for r in rncards))) for r in rncards: joblist = [] # Check whether this run has something on the gridStorage self.check_for_existing_output(r, dCards[r]) # use the same unique name for all seeds since # we cannot multiprocess the arc submission xrslfile = None keyquit = None # Sanity check for test queue if test and producRun > 5: self._press_yes_to_continue( " \033[93m WARNING:\033[0m About to submit a large number ({0}) of jobs to the test queue." .format(producRun)) try: for seed in range(baseSeed, baseSeed + producRun): arguments = self._get_prod_args(r, dCards[r], seed) dictData = { 'arguments': arguments, 'jobName': jobName, 'count': str(1), 'countpernode': str(1), } xrslfile = self._write_XRSL(dictData, filename=xrslfile) if (seed == baseSeed): header.logger.debug( " > Path of xrsl file: {0}".format(xrslfile)) # Run the file jobid = self._run_XRSL(xrslfile, test=test) joblist.append(jobid) except Exception as interrupt: print("\n") header.logger.error( "Submission error encountered. Inserting all successful submissions to database" ) keyquit = interrupt # Create daily path pathfolder = util.generatePath(warmup=False) # Create database entry jobStr = ' '.join(joblist) dataDict = { 'jobid': jobStr, 'date': str(datetime.now()), 'pathfolder': pathfolder, 'runcard': r, 'jobtype': job_type, 'runfolder': dCards[r], 'iseed': str(baseSeed), 'no_runs': str(producRun), 'status': "active", } if len(joblist) > 0: self.dbase.insert_data(self.table, dataDict) else: header.logger.critical( "No jobids returned, no database entry inserted for submission: {0} {1}" .format(r, dCards[r])) if keyquit is not None: raise keyquit
def run_wrap_warmup(self, test=False, expandedCard=None): """ Wrapper function. It assumes the initialisation stage has already happend Writes sbatch file with the appropiate information and send one single job (or n_sockets jobs) to the queue ExpandedCard is an override for util.expandCard for use in auto-resubmission """ from pyHepGrid.src.header import warmupthr if test: from pyHepGrid.src.header import test_queue as queue else: from pyHepGrid.src.header import warmup_queue as queue # runcard names (of the form foo.run) # dCards, dictionary of { 'runcard' : 'name' }, # can also include extra informations if expandedCard is None: rncards, dCards = util.expandCard() else: rncards, dCards = expandedCard if header.sockets_active > 1: sockets = True n_sockets = header.sockets_active else: sockets = False n_sockets = 1 if test: job_type = "Warmup Test" else: job_type = "Warmup" self.runfolder = header.runcardDir # loop over all .run files defined in runcard.py header.logger.info("Runcards selected: {0}".format(" ".join(rncards))) port = header.port for r in rncards: if n_sockets > 1: # Automatically activates the socket and finds the best port! port = sapi.fire_up_socket_server(header.server_host, port, n_sockets, None, header.socket_exe, tag="{0}-{1}".format( r, dCards[r]), tmuxloc=header.tmux_location) job_type = "Socket={}".format(port) # TODO check if warmup exists? nah # Generate the SLURM file if n_sockets > 1: array = True else: array = False arguments = self._get_warmup_args(r, dCards[r], n_sockets=n_sockets, threads=warmupthr, sockets=sockets, port=port, array=array, queue=queue) slurmfile = self._write_SLURM(arguments, self.templ) header.logger.debug(" > Path of slurm file: {0}".format(slurmfile)) jobids = [] jobid, runqueue = self._run_SLURM(slurmfile, arguments, queue, test=test, n_sockets=n_sockets) jobids.append(jobid) # Create database entry dataDict = { 'jobid': ' '.join(jobids), 'no_runs': str(n_sockets), 'date': str(datetime.now()), 'pathfolder': arguments["runcard_dir"], 'runcard': r, 'runfolder': dCards[r], 'jobtype': job_type, 'queue': str(runqueue), 'status': "active", } if len(jobids) > 0: self.dbase.insert_data(self.table, dataDict) else: header.logger.critical( "No jobids returned, no database entry inserted for " F"submission: {r} {dCards[r]}") port += 1
def init_production(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a production run. If a warmup file is provided retrieval step is skipped/ Steps are: 0 - Retrieve warmup from the grid/local 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ import tempfile from pyHepGrid.src.header import runcardDir as runFol from pyHepGrid.src.header import executable_exe, executable_src_dir, \ grid_input_dir if local: self.init_local_production(provided_warmup=provided_warmup) return runFolders, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) if provided_warmup: warmup_base = provided_warmup elif header.provided_warmup_dir: warmup_base = header.provided_warmup_dir else: logger.critical( "Retrieving warmup file from grid: Not implemented") os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) if not os.path.isfile(path_to_exe_full): logger.critical( "Could not find executable at {0}".format(path_to_exe_full)) else: tar_name = os.path.basename(header.grid_executable) grid_exe_dir = os.path.dirname(header.grid_executable) exe_name = header.executable_exe os.system("cp -r " + path_to_exe_full + " " + exe_name) upload_exe = True if self.gridw.checkForThis(tar_name, grid_exe_dir): if not self._press_yes_to_continue( "Old executable found. Do you want to remove it?", fallback=1): logger.info( F"Removing old version of {tar_name} from Grid Storage" ) self.gridw.delete(tar_name, grid_exe_dir) else: upload_exe = False if upload_exe: self.tarw.tarFiles([exe_name], tar_name) self.gridw.send(tar_name, grid_exe_dir) for idx, runName in enumerate(runFolders): local = False tarfile = runName + ".tar.gz" # base_folder = runName.split("-")[0] # logger.info( # "Initialising {0} to {1} [{2}/{3}]".format( # runName, tarfile, idx + 1, len(runFolders))) # # runcards # run_dir = os.path.join(runFol, base_folder) # runFiles = dCards[runName].split("+") # for f in runFiles: # f = os.path.join(run_dir, f) # self._file_exists(f, logger) # os.system("cp -r " + f + " " + tmpdir) # # warmup files # for f in self._WARMUP_FILES: # f = os.path.join(warmup_base, base_folder, f) # self._file_exists(f, logger) # os.system("cp -r " + f + " " + tmpdir) logger.info("Sending JUNE code to Grid storage.") upload_data = True if self.gridw.checkForThis(tarfile, grid_input_dir): if not self._press_yes_to_continue( "Old data found. Do you want to remove it?", fallback=1): logger.info( F"Removing old version of {tarfile} from Grid Storage") self.gridw.delete(tarfile, grid_input_dir) else: upload_data = False if upload_data: # tar up & send to grid storage # self.tarw.tarFiles(self._WARMUP_FILES + runFiles, tarfile) if self.gridw.checkForThis(tarfile, grid_input_dir): logger.info( "Removing old version of {0} from Grid Storage".format( tarfile)) self.gridw.delete(tarfile, grid_input_dir) logger.info("Sending {0} to {1}".format( tarfile, grid_input_dir)) os.system("gfal-copy {0} {1}".format( origdir + "/" + tarfile, header.gfaldir + grid_input_dir)) # self.gridw.send(tarfile, grid_input_dir) if hasattr(header, 'world_list'): world_list = header.world_list for world in world_list: upload_world = True world_file = "{0}.hdf5".format(world) world_path = grid_input_dir + "/worlds" if self.gridw.checkForThis(world_file, world_path): if not self._press_yes_to_continue( "Old {0} found. Do you want to remove it?".format( world_file), fallback=1): logger.info( F"Removing old version of {world_file} from Grid Storage" ) self.gridw.delete(world_file, world_path) else: upload_world = False if upload_world: print("Sending {0} to Grid Storage.".format(world_file)) if self.gridw.checkForThis(world_file, world_path): logger.info( "Removing old version of {0} from Grid Storage". format(world_file)) self.gridw.delete(world_file, world_path) logger.info("Sending {0} to {1}".format( world_file, world_path)) os.system("gfal-copy {0} {1}".format( origdir + "/" + world_file, header.gfaldir + world_path)) if hasattr(header, 'latin_hypercube'): lhs_file = header.latin_hypercube upload_lhs = True lhs_file = "{0}.npy".format(lhs_file) if self.gridw.checkForThis(lhs_file, grid_input_dir): if not self._press_yes_to_continue( "Old {0} found. Do you want to remove it?".format( lhs_file), fallback=1): logger.info( F"Removing old version of {lhs_file} from Grid Storage" ) self.gridw.delete(lhs_file, grid_input_dir) else: upload_lhs = False if upload_lhs: print("Sending {0} to Grid Storage.".format(lhs_file)) if self.gridw.checkForThis(lhs_file, grid_input_dir): logger.info( "Removing old version of {0} from Grid Storage".format( lhs_file)) self.gridw.delete(lhs_file, grid_input_dir) logger.info("Sending {0} to {1}".format( lhs_file, grid_input_dir)) os.system("gfal-copy {0} {1}".format( origdir + "/" + lhs_file, header.gfaldir + grid_input_dir)) # clean up afterwards os.chdir(origdir) os.system("rm -r " + tmpdir)
def init_production(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a production run. If a warmup file is provided retrieval step is skipped Steps are: 0 - Retrieve warmup from the grid/local 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ import tempfile from pyHepGrid.src.header import runcardDir as runFol from pyHepGrid.src.header import executable_exe, executable_src_dir, grid_input_dir if local: self.init_local_production(provided_warmup=provided_warmup) return rncards, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) if provided_warmup: warmup_base = provided_warmup elif header.provided_warmup_dir: warmup_base = header.provided_warmup_dir else: # print("Retrieving warmup file from grid") # warmupFiles = self._bring_warmup_files(i, dCards[i], shell=True) logger.critical("Retrieving warmup file from grid: Not implemented") # setup LHAPDF if header.use_cvmfs_lhapdf: os.environ['LHAPDF_DATA_PATH'] = header.cvmfs_lhapdf_location # create Process dir in Sherpa self._init_Sherpa(warmup_base,rncards) os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) # if not os.path.isfile(path_to_exe_full): # logger.critical("Could not find executable at {0}".format(path_to_exe_full)) # copy(path_to_exe_full, os.getcwd()) # files = [executable_exe] for idx, i in enumerate(rncards): local = False tarfile = i +"+"+ dCards[i] + ".tar.gz" base_folder = i.split("-")[0] + "/" logger.info("Initialising {0} to {1} [{2}/{3}]".format(i, tarfile, idx+1, len(rncards))) # runcards run_dir = runFol + base_folder runFiles = [dCards[i]+".yml"] for f in runFiles: os.system("cp -r "+run_dir+f+" "+tmpdir) # warmup files warmupFiles = ["Process", "Run.dat", "Results.db"] for f in warmupFiles: os.system("cp -r "+warmup_base+base_folder+f+" "+tmpdir) # tar up & send to grid storage self.tarw.tarFiles(warmupFiles+runFiles, tarfile) if self.gridw.checkForThis(tarfile, grid_input_dir): logger.info("Removing old version of {0} from Grid Storage".format(tarfile)) self.gridw.delete(tarfile, grid_input_dir) logger.info("Sending {0} to {1}".format(tarfile, grid_input_dir)) self.gridw.send(tarfile, grid_input_dir, shell=True) # clean up afterwards os.chdir(origdir) os.system("rm -r "+tmpdir)
def init_production(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a production run. If a warmup file is provided retrieval step is skipped Steps are: 0 - Retrieve warmup from the grid/local 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ from shutil import copy import tempfile from pyHepGrid.src.header import runcardDir as runFol from pyHepGrid.src.header import executable_exe, executable_src_dir, logger if local: self.init_local_production(provided_warmup=provided_warmup) return rncards, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) if not os.path.isfile(path_to_exe_full): logger.critical("Could not find executable at {0}".format(path_to_exe_full)) copy(path_to_exe_full, os.getcwd()) files = [executable_exe] for idx, i in enumerate(rncards): logger.info("Initialising {0} [{1}/{2}]".format(i, idx+1, len(rncards))) local = False # Check whether warmup/production is active in the runcard runcard_file = os.path.join(runFol, i) runcard_obj = PROGRAMruncard(runcard_file, logger=logger, use_cvmfs=header.use_cvmfs_lhapdf, cvmfs_loc=header.cvmfs_lhapdf_location) multichannel = self.check_runcard_multichannel(runcard_obj) self._check_production(runcard_obj) rname = dCards[i] tarfile = i + rname + ".tar.gz" copy(os.path.join(runFol, i), os.getcwd()) if provided_warmup: match, local = self._get_local_warmup_name(runcard_obj.warmup_filename(), provided_warmup) warmupFiles = [match] elif header.provided_warmup_dir: match, local = self._get_local_warmup_name(runcard_obj.warmup_filename(), header.provided_warmup_dir) warmupFiles = [match] else: logger.info("Retrieving warmup file from grid") warmupFiles = self._bring_warmup_files(i, rname, shell=True, multichannel=multichannel) self.tarw.tarFiles(files + [i] + warmupFiles, tarfile) if self.gridw.checkForThis(tarfile, header.grid_input_dir): logger.info("Removing old version of {0} from Grid Storage".format(tarfile)) self.gridw.delete(tarfile, header.grid_input_dir) logger.info("Sending {0} to GFAL {1}/".format(tarfile, header.grid_input_dir)) self.gridw.send(tarfile, header.grid_input_dir, shell=True) if local: util.spCall(["rm", i, tarfile]) else: util.spCall(["rm", i, tarfile] + warmupFiles) os.remove(executable_exe) os.chdir(origdir)
def init_warmup(self, provided_warmup=None, continue_warmup=False, local=False): """ Initialises a warmup run. An warmup file can be provided and it will be added to the .tar file sent to the grid storage. Steps are: 1 - tar up executable, runcard and necessary files 2 - sent it to the grid storage """ from shutil import copy import tempfile from pyHepGrid.src.header import executable_src_dir, executable_exe, logger from pyHepGrid.src.header import runcardDir as runFol if local: self.init_local_warmups(provided_warmup=provided_warmup, continue_warmup=continue_warmup) return origdir = os.path.abspath(os.getcwd()) tmpdir = tempfile.mkdtemp() # if provided warmup is a relative path, ensure we have the full path # before we change to the tmp directory if provided_warmup: if provided_warmup[0] != "/": provided_warmup = "{0}/{1}".format(origdir, provided_warmup) os.chdir(tmpdir) logger.debug("Temporary directory: {0}".format(tmpdir)) rncards, dCards = util.expandCard() path_to_exe_full = self._exe_fullpath(executable_src_dir, executable_exe) if not os.path.isfile(path_to_exe_full): logger.critical("Could not find executable at {0}".format(path_to_exe_full)) copy(path_to_exe_full, os.getcwd()) files = [executable_exe] for idx, i in enumerate(rncards): logger.info("Initialising {0} [{1}/{2}]".format(i, idx+1, len(rncards))) local = False warmupFiles = [] # Check whether warmup/production is active in the runcard runcard_file = os.path.join(runFol, i) if not os.path.isfile(runcard_file): self._press_yes_to_continue("Could not find runcard {0}".format(i), error="Could not find runcard") runcard_obj = PROGRAMruncard(runcard_file, logger=logger, use_cvmfs=header.use_cvmfs_lhapdf, cvmfs_loc=header.cvmfs_lhapdf_location) self._check_warmup(runcard_obj, continue_warmup) multichannel = self.check_runcard_multichannel(runcard_obj) if provided_warmup: # Copy warmup to current dir if not already there match, local = self._get_local_warmup_name(runcard_obj.warmup_filename(), provided_warmup) files += [match] rname = dCards[i] tarfile = i + rname + ".tar.gz" copy(os.path.join(runFol, i), os.getcwd()) if self.overwrite_warmup: checkname = self.warmup_name(i, rname) if self.gridw.checkForThis(checkname, header.grid_warmup_dir): logger.info("Warmup found in GFAL:{0}!".format(header.grid_warmup_dir)) warmup_files = self._bring_warmup_files(i, rname, shell=True, multichannel=multichannel) files += warmup_files logger.info("Warmup files found: {0}".format(" ".join(i for i in warmup_files))) self.tarw.tarFiles(files + [i], tarfile) if self.gridw.checkForThis(tarfile, header.grid_input_dir): # Could we cache this? Just to speed up ini logger.info("Removing old version of {0} from Grid Storage".format(tarfile)) self.gridw.delete(tarfile, header.grid_input_dir) logger.info("Sending {0} to gfal {1}/".format(tarfile, header.grid_input_dir)) self.gridw.send(tarfile, header.grid_input_dir, shell=True) if not local: for j in warmupFiles: os.remove(j) os.remove(i) os.remove(tarfile) os.remove(executable_exe) os.chdir(origdir)