def ClusterParameterEstimator_4SLURM(yml=None, outputdir=None, task=None, isTest=False): config = Configuration() config.sbatch_partition = 'cp3' config.sbatch_qos = 'cp3' config.cmsswDir = os.path.dirname(os.path.abspath(__file__)) config.sbatch_chdir = os.path.join(config.cmsswDir, outputdir) config.sbatch_time = '0-02:00' config.sbatch_memPerCPU = '2000' config.batchScriptsFilename = "slurmSubmission.sh" #config.environmentType = 'cms' config.inputSandboxContent = [ "skimProducer.py" if task == "skim" else ("SiStripHitResol.py" if task == "hitresolution" else ("CPEstimator.py")) ] config.stageoutFiles = ['*.root'] config.stageoutDir = config.sbatch_chdir config.inputParamsNames = ["inputFiles", "outputFile", "task", "sample"] analysisCfgs = os.path.join(config.cmsswDir, yml) config.inputParams = getTasks(task=task, analysisCfgs=analysisCfgs, cmsswDir=config.cmsswDir, stageoutDir=config.stageoutDir, isTest=isTest) shutil.copyfile(analysisCfgs, config.stageoutDir + "/analysis.yml") config.payload = \ """ echo ${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID} if [[ "$task" == *"skim"* ]]; then cmsRun skimProducer.py inputFiles=${inputFiles} outputFile=${outputFile} elif [[ "$task" == "hitresolution" ]]; then cmsRun SiStripHitResol.py inputFiles=${inputFiles} outputFile=${outputFile} else cmsRun CPEstimator.py inputFiles=${inputFiles} outputFile=${outputFile} fi """ submitWorker = SubmitWorker(config, submit=True, yes=True, debug=True, quiet=True) submitWorker() logger.warning( 'Work still in progress for better workflow ...\n' 'To hadd files and produce plots. Please run as follow when the jobs finish running\n' 'python postprocessing.py --workdir {}\n' 'squeue -u user_name : allows you to check your submitted jobs status\n' .format(outputdir))
def submit_on_slurm(name,args,debug=False): # Check arguments # GPU = args.find("--GPU") != -1 output = args.find("--output") != -1 config = Configuration() config.sbatch_partition = parameters.partition config.sbatch_qos = parameters.QOS config.sbatch_chdir = parameters.main_path config.sbatch_time = parameters.time config.sbatch_additionalOptions = [parameters.additional_options] config.sbatch_memPerCPU = parameters.mem if parameters.partition == 'cp3-gpu': config.sbatch_additionalOptions += ['--export=NONE'] #if parameters.cpus > 1: # config.sbatch_additionalOptions += ["--cpus-per-gpu={}".format(parameters.cpus)] #config.sbatch_additionalOptions += ['--mem-per-gpu={}'.format(parameters.mem)] elif parameters.partition == 'gpu': config.sbatch_additionalOptions += ['--gres=gpu:TeslaV100:{}'.format(parameters.gpus),'--export=NONE'] #config.sbatch_additionalOptions += ['--mem-per-gpu={}'.format(parameters.mem)] #if parameters.cpus > 1: # config.sbatch_additionalOptions += ["--cpus-per-gpu={}".format(parameters.cpus)] #if parameters.cpus > 1: # config.sbatch_additionalOptions += ["--cpus-per-gpu={}".format(parameters.cpus)] # config.sbatch_additionalOptions += ["--cpus-per-task={}".format(parameters.cpus)] #config.sbatch_additionalOptions += ["-c {}".format(parameters.cpus)] else: if parameters.tasks > 1: config.sbatch_additionalOptions += ["-n={}".format(parameters.tasks)] if parameters.cpus > 1: config.sbatch_additionalOptions += ["--cpus-per-task={}".format(parameters.cpus)] config.inputSandboxContent = [] config.useJobArray = True config.inputParamsNames = [] config.inputParams = [] if output: config.inputParamsNames += ["--verbose"] config.inputParams += [[""]] if not output: config.inputParamsNames += ['scan','task'] if parameters.crossvalidation and parameters.split_per_model: config.inputParamsNames += ['modelId'] config.payload = "" if parameters.partition == 'cp3-gpu': config.payload += "export PYTHONPATH=/python3/lib/python3.6/site-packages/:$PYTHONPATH\n" # GPU tf config.payload += "export PYTHONPATH=/root6/lib:$PYTHONPATH\n" # ROOT config.payload += "module load cp3\n" # needed on gpu to load slurm_utils config.payload += "module load python/python36_sl7_gcc73\n" config.payload += "module load slurm/slurm_utils\n" if parameters.partition == 'gpu': config.payload += "module load releases/2019b_test \n" config.payload += "module load cp3\n" # needed on gpu to load slurm_utils config.payload += "module load root/6.12.04-sl7_gcc73 \n" config.payload += "module load root_numpy \n" config.payload += "module load TensorFlow \n" config.payload += "module load slurm/slurm_utils\n" config.payload += "python3 {script} " if not output: config.payload += "--scan ${{scan}} --task ${{task}} " if parameters.crossvalidation and parameters.split_per_model: config.payload += "--modelId ${{modelId}}" config.payload += args timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') out_dir = parameters.main_path slurm_config = copy.deepcopy(config) slurm_working_dir = os.path.join(out_dir,'slurm',name+'_'+timestamp) slurm_config.batchScriptsDir = os.path.join(slurm_working_dir, 'scripts') slurm_config.inputSandboxDir = slurm_config.batchScriptsDir slurm_config.stageoutDir = os.path.join(slurm_working_dir, 'output') slurm_config.stageoutLogsDir = os.path.join(slurm_working_dir, 'logs') slurm_config.stageoutFiles = ["*.csv","*.zip","*.png"] slurm_config.payload = config.payload.format(script=os.path.join(out_dir,"HHMachineLearning.py")) if not output: for f in glob.glob(os.path.join(parameters.main_path,'split',name,'*.pkl')): task = os.path.basename(f) if parameters.crossvalidation and parameters.split_per_model: for N in range(parameters.N_models): slurm_config.inputParams.append([name,task,N]) else: slurm_config.inputParams.append([name,task]) # Submit job! logging.info("Submitting job...") if not debug: submitWorker = SubmitWorker(slurm_config, submit=True, yes=True, debug=False, quiet=False) submitWorker() logging.info("Done") else: logging.info("Number of jobs : %d"%len(slurm_config.inputParams)) logging.info(slurm_config.payload) logging.info(slurm_config.inputParamsNames) for inputParam in slurm_config.inputParams: logging.info(inputParam) logging.info('... don\'t worry, jobs not sent')