def main(input_dir, path_models, config_file, flag_full_workflow=False, particle=None, wait_jobid_train_pipe=None, wait_jobids_merge=None, dictionary_with_dl1_paths=None, source_environment=None): """ Convert onsite files from dl1 to dl2" Parameters ---------- input_dir : str path to the files directory to analyse path_models : str path to the trained models config_file : str Path to a configuration file. If none is given, a standard configuration is applied flag_full_workflow : bool Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files. particle : str Type of particle used to create the log and dictionary ! COMPULSORY argument when flag_full_workflow is set to True. wait_jobid_train_pipe : str a string with the batched jobid from the train stage to indicate the dependencies of the job to be batched ! COMPULSORY argument when flag_full_workflow is set to True. wait_jobids_merge : str string with merge_and_copy jobids ! COMPULSORY argument when flag_full_workflow is set to True. dictionary_with_dl1_paths : dict Dictionary with 'particles' as keys containing final outnames of dl1 files. ! COMPULSORY argument when flag_full_workflow is set to True. source_environment : str path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script) to activate a certain conda environment. DEFAULT: `source /fefs/aswg/software/virtual_env/.bashrc; conda activate cta`. ! NOTE : train_pipe AND dl1_to_dl2 MUST BE RUN WITH THE SAME ENVIRONMENT Returns ------- log_dl1_to_dl2 : dict dictionary of dictionaries containing the jobid of the batched job as key and the run command (the lstchain_mc_dl1_to_dl2 command with all its corresponding arguments) as value. **** otherwise : (if flag_full_workflow is False, by default) **** None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO jobid_dl1_to_dl2 : str jobid of the batched job to be send (for dependencies purposes) to the next stage of the workflow (dl2_to_irfs) """ output_dir = input_dir.replace('DL1', 'DL2') if flag_full_workflow: check_and_make_dir_without_verification(output_dir) print(f"\tOutput dir {particle}: {output_dir}") log_dl1_to_dl2 = {particle: {}} # path to dl1 files by particle type file_list = [ dictionary_with_dl1_paths[particle]['training'] ['train_path_and_outname_dl1'], dictionary_with_dl1_paths[particle] ['testing']['test_path_and_outname_dl1'] ] return_jobids = [] if wait_jobid_train_pipe == '': wait_jobs = wait_jobids_merge elif wait_jobids_merge == '': wait_jobs = wait_jobid_train_pipe elif wait_jobids_merge == '' and wait_jobid_train_pipe == '': wait_jobs = '' else: wait_jobs = ','.join([wait_jobid_train_pipe, wait_jobids_merge]) job_name = { 'electron': 'dl1-2_e', 'gamma': 'dl1-2_g', 'gamma-diffuse': 'dl1-2_gd', 'proton': 'dl1-2_p', 'gamma_off0.0deg': 'g.0_1-2dl', 'gamma_off0.4deg': 'g.4_1-2dl' } else: print(f"\n ==== START {os.path.basename(__file__)} ==== \n") check_and_make_dir(output_dir) print(f"Output dir: {output_dir}") file_list = [ os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.startswith('dl1_') ] query_continue(f"{len(file_list)} jobs, ok?") for file in file_list: cmd = '' if source_environment is not None: cmd += source_environment cmd += f'lstchain_dl1_to_dl2 -f {file} -p {path_models} -o {output_dir}' if config_file is not None: cmd += f' -c {config_file}' if not flag_full_workflow: # Run interactively # print(cmd) os.system(cmd) else: # flag_full_workflow == True ! # 'sbatch --parsable --dependency=afterok:{wait_ids_proton_and_gammas} --wrap="{cmd}"' if 'training' in file: ftype = 'train' elif 'testing' in file: ftype = 'test' else: ftype = '-' jobe = os.path.join(output_dir, f"dl1_dl2_{particle}_{ftype}job.e") jobo = os.path.join(output_dir, f"dl1_dl2_{particle}_{ftype}job.o") batch_cmd = f'sbatch --parsable -p short --dependency=afterok:{wait_jobs} -J {job_name[particle]}' \ f' -e {jobe} -o {jobo} --wrap="{cmd}"' jobid_dl1_to_dl2 = os.popen(batch_cmd).read().strip('\n') log_dl1_to_dl2[particle][jobid_dl1_to_dl2] = batch_cmd if 'testing' in file: log_dl1_to_dl2[particle]['dl2_test_path'] = file.replace( '/DL1/', '/DL2/').replace('dl1_', 'dl2_') return_jobids.append(jobid_dl1_to_dl2) # copy this script and config into working dir shutil.copyfile(__file__, os.path.join(output_dir, os.path.basename(__file__))) if config_file is not None: shutil.copyfile( config_file, os.path.join(output_dir, os.path.basename(config_file))) if not flag_full_workflow: print(f"\n ==== END {os.path.basename(__file__)} ==== \n") else: return_jobids = ','.join(return_jobids) return log_dl1_to_dl2, return_jobids
def main(gamma_dl1_train_file, proton_dl1_train_file, config_file=None, source_environment=source_env, flag_full_workflow=False, wait_ids_proton_and_gammas=None): """ Train RF from dl1 data (onsite LaPalma cluster) Parameters ---------- gamma_dl1_train_file: str path to the gamma file proton_dl1_train_file: str path to the proton file config_file: str Path to a configuration file. If none is given, a standard configuration is applied source_environment : str path to a .bashrc file (lstanalyzer user by default - can be configurable for custom runs) to activate a certain conda environment. By default : `conda activate cta`. ! NOTE : train_pipe AND dl1_to_dl2 MUST BE RUN WITH THE SAME ENVIRONMENT flag_full_workflow : bool Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files. wait_ids_proton_and_gammas : str a string (of chained jobids separated by ',' and without spaces between each element), to indicate the dependencies of the job to be batched COMPULSORY argument when flag_full_workflow is set to True. Returns ------- log_train : dict (if flag_full_workflow is True) dictionary containing the jobid of the batched job as key and the run command (the lstchain_mc_train command with all its corresponding arguments) as value. **** otherwise : (if flag_full_workflow is False, by default) **** None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO jobid_train : str (if flag_full_workflow is True) jobid of the batched job to be send (for dependencies purposes) to the next stage of the workflow (onsite_mc_dl1_to_dl2) models_dir : str Path with the directory where the models are stored """ if flag_full_workflow: log_train = {} else: print(f"\n ==== START {os.path.basename(__file__)} ==== \n") # dl1_gamma_dir = os.path.dirname(os.path.abspath(gamma_dl1_train_file)) dl1_proton_dir = os.path.dirname(os.path.abspath(proton_dl1_train_file)) # check if it path follows the established paths (lstchain-like) or not (rta-like) ## if dl1_proton_dir.find('/mc/DL1/') > 0: models_dir = dl1_proton_dir.replace('/mc/DL1', '/models') else: models_dir = dl1_proton_dir.replace('/DL1', '/models') models_dir = models_dir.replace('/proton/', '/') print(f"\tModels will be placed in {models_dir}") if flag_full_workflow: check_and_make_dir_without_verification(models_dir) else: check_and_make_dir(models_dir) base_cmd = '' base_cmd += source_environment base_cmd += f'lstchain_mc_trainpipe --fg {os.path.abspath(gamma_dl1_train_file)}' \ f' --fp {os.path.abspath(proton_dl1_train_file)} -o {models_dir}' if config_file is not None: base_cmd = base_cmd + ' -c {}'.format(config_file) jobo = os.path.join(models_dir, "train_job.o") jobe = os.path.join(models_dir, "train_job.e") if not flag_full_workflow: cmd = f'sbatch -p long -e {jobe} -o {jobo} --wrap="{base_cmd}" ' print(cmd) os.system(cmd) else: # flag_full_workflow == True ! # 'sbatch --parsable --dependency=afterok:{wait_ids_proton_and_gammas} -e {jobe} -o {jobo} --wrap="{base_cmd}"' cmd = 'sbatch --parsable -p long' if wait_ids_proton_and_gammas != '': cmd += ' --dependency=afterok:' + wait_ids_proton_and_gammas cmd += f' -J train_pipe -e {jobe} -o {jobo} --wrap="{base_cmd}" ' jobid_train = os.popen(cmd).read().strip('\n') log_train[jobid_train] = cmd # copy this script and config into working dir shutil.copyfile(__file__, os.path.join(models_dir, os.path.basename(__file__))) if config_file is not None: shutil.copyfile( config_file, os.path.join(models_dir, os.path.basename(config_file))) if not flag_full_workflow: print(f"\n ==== END {os.path.basename(__file__)} ==== \n") else: return log_train, jobid_train, models_dir
def main(dl2_directory, config_file, irf_point_like=True, irf_gamma_offset='0.0deg', source_env=None, flag_full_workflow=False, log_from_dl1_dl2={}, wait_jobs_dl1dl2=None, prod_id=None): """ Batches/runs interactively the lstchain `lstchain_create_irf_files` entry point. Last stage of the MC prod workflow. Parameters ---------- dl2_directory: str General path to DL2 directory, not formatted with the particle. config_file: str Path to a configuration file. If none is given, a standard configuration is applied irf_point_like: bool MC prod configuration argument to create IRFs: {True: gamma, False: gamma-diffuse}. irf_gamma_offset: str MC prod configuration argument to create IRFs: 0.0deg (for ON/OFF obs) or 0.4deg (for wobble obs). source_env: str path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script) to activate a certain conda environment. flag_full_workflow: bool Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files. log_from_dl1_dl2: dict Dictionary with dl2 output path. Files are not yet here, but path and full name are needed to batch the job. wait_jobs_dl1dl2: str Comma separated string with the job ids of previous stages (dl1_to_dl2 stage) to be passed as dependencies to the create_irfs_files job to be batched. prod_id: str prod_id defined within config_MC_prod.yml file Returns ------- log_dl2_to_irfs: dict Dictionary-wise log containing {'job_id': 'batched_cmd'} list_job_id_dl2_irfs: list Job-ids of the batched job to be passed to the last (MC prod check) stage of the workflow. """ allowed_gamma_off = ['0.0deg', '0.4deg'] if irf_gamma_offset not in allowed_gamma_off: print( f'Please select a valid gamma_offset to compute the IRFS: {" or ".join(allowed_gamma_off)}' ) exit(-1) output_irfs_dir = dl2_directory.replace('/DL2/', '/IRF/').replace('/{}/', '/') log_dl2_to_irfs = {} list_job_id_dl2_irfs = [] if not flag_full_workflow or log_from_dl1_dl2 == {}: dl2_particle_paths = check_dl2_files(dl2_directory, irf_point_like, irf_gamma_offset) # Comprehension list to find gamma or gamma-diffuse gamma_kind = [ g for g in dl2_particle_paths.keys() if g.startswith('gamma') ][0] gamma_file = dl2_particle_paths[gamma_kind] proton_file = dl2_particle_paths['proton'] electron_file = dl2_particle_paths['electron'] name_tag_slurm = gamma_kind else: proton_file = log_from_dl1_dl2['proton']['dl2_test_path'] electron_file = log_from_dl1_dl2['electron']['dl2_test_path'] if irf_point_like and irf_gamma_offset == '0.0deg': gamma_file = log_from_dl1_dl2['gamma_off0.0deg']['dl2_test_path'] name_tag_slurm = irf_gamma_offset elif irf_point_like and irf_gamma_offset == '0.4deg': gamma_file = log_from_dl1_dl2['gamma_off0.4deg']['dl2_test_path'] name_tag_slurm = irf_gamma_offset else: gamma_file = log_from_dl1_dl2['gamma-diffuse']['dl2_test_path'] name_tag_slurm = 'diffuse' if irf_point_like: point_like = '--point-like' else: point_like = '' # Final outfile name with IRF kind if prod_id is None: output_filename_irf = os.path.join(output_irfs_dir, 'irf.fits.gz') else: if irf_point_like: output_filename_irf = os.path.join( output_irfs_dir, 'irf_' + prod_id.replace('.', '') + f'gamma_point-like_off{irf_gamma_offset.replace(".", "")}.fits.gz' ) else: output_filename_irf = os.path.join( output_irfs_dir, 'irf_' + prod_id.replace('.', '') + f'gamma_diffuse.fits.gz') cmd = f'lstchain_create_irf_files {point_like} -g {gamma_file} -p {proton_file} -e {electron_file}' \ f' -o {output_filename_irf}' if config_file is not None: cmd += f' --config={config_file}' if not flag_full_workflow: print(f"\n ==== START {os.path.basename(__file__)} ==== \n") check_and_make_dir(output_irfs_dir) # print (cmd) os.system(cmd) print(f"\n ==== END {os.path.basename(__file__)} ==== \n") else: # flag_full_workflow == True ! print(f'\tOutput dir IRFs: {output_irfs_dir}') check_and_make_dir_without_verification(output_irfs_dir) jobe = os.path.join(output_irfs_dir, f"job_dl2_to_irfs.e") jobo = os.path.join(output_irfs_dir, f"job_dl2_to_irfs.o") batch_cmd = f'sbatch --parsable -p short --dependency=afterok:{wait_jobs_dl1dl2} -J MC_IRF_{name_tag_slurm}' \ f' -e {jobe} -o {jobo} --wrap="{source_env} {cmd}"' job_id_dl2_irfs = os.popen(batch_cmd).read().strip('\n') log_dl2_to_irfs[job_id_dl2_irfs] = batch_cmd list_job_id_dl2_irfs.append(job_id_dl2_irfs) # Copy Script and config into working dir shutil.copyfile(__file__, os.path.join(output_irfs_dir, os.path.basename(__file__))) if config_file is not None or config_file is not '': shutil.copyfile( config_file, os.path.join(output_irfs_dir, os.path.basename(config_file))) if flag_full_workflow: return log_dl2_to_irfs, list_job_id_dl2_irfs
def main(input_dir, flag_full_workflow=False, particle2jobs_dict={}, particle=None, flag_merge=False, flag_no_image=True, prod_id=None, gamma_offset=None, source_environment=None): """ Merge and copy DL1 data after production. 1. check job_logs 2. check that all files have been created in DL1 based on training and testing lists 3. move DL1 files in final place 4. merge DL1 files 5. move running_dir Parameters ---------- input_dir : str path to the DL1 files directory to merge, copy and move. Compulsory argument. flag_full_workflow : bool Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files. particle2jobs_dict : dict Dictionary used to retrieve the r0 to dl1 jobids that were sent in the previous step of the r0-dl3 workflow. This script will NOT start until all the jobs sent before have finished. COMPULSORY argument when flag_full_workflow is set to True. particle : str Type of particle used to create the log and dictionary COMPULSORY argument when flag_full_workflow is set to True. flag_merge : bool Flag to indicate whether the `--smart` argument of the `lstchain_merge_hdf5_files.py` script must be set to True (smart merge) or False (auto merge). Default set to True. flag_no_image : bool Flaf to indicate whether the `--no-image` argument of the `lstchain_merge_hdf5_files.py` script must be set to True (--no-image True) or False (--no-image False). Default set to True. prod_id : str prod_id for output filename. gamma_offset : str if gamma files have various off0.Xdeg observations, include the offset within the filename for completeness. source_environment : str path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script) to activate a certain conda environment. DEFAULT: `source /fefs/aswg/software/virtual_env/.bashrc; conda activate cta`. ! NOTE : train_pipe AND dl1_to_dl2 **MUST** be run with the same environment. Returns ------- log_merge : dict (if flag_full_workflow is True) dictionary of dictionaries containing the log information of this script and the jobid of the batched job, separated by particle - log_merge[particle][set_type].keys() = ['logs_script_test or logs_script_train', 'train_path_and_outname_dl1 or test_path_and_outname_dl1', 'jobid'] **** otherwise : (if flag_full_workflow is False, by default) **** None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO return_jobids4train : str (if flag_full_workflow is True) jobid of the batched job to be send (for dependencies purposes) to the next stage of the workflow (train_pipe), by particle return_jobids_debug ; str jobids to store in log_reduced.txt - Mainly debug purposes. """ if flag_full_workflow: log_merge = {particle: {'training': {}, 'testing': {}}} wait_r0_dl1_jobs = particle2jobs_dict[particle] return_jobids4train = [] return_jobids_debug = [] job_name = { 'electron': 'e_merge', 'gamma': 'g_merge', 'gamma-diffuse': 'gd_merge', 'proton': 'p_merge', 'gamma_off0.0deg': 'g0.0_merge', 'gamma_off0.4deg': 'g0.4_merge' } else: print(f"\n ==== START {os.path.basename(__file__)} ==== \n") JOB_LOGS = os.path.join(input_dir, 'job_logs') training_filelist = os.path.join(input_dir, 'training.list') testing_filelist = os.path.join(input_dir, 'testing.list') running_DL1_dir = os.path.join(input_dir, 'DL1') DL1_training_dir = os.path.join(running_DL1_dir, 'training') DL1_testing_dir = os.path.join(running_DL1_dir, 'testing') final_DL1_dir = input_dir.replace('running_analysis', 'DL1') logs_destination_dir = input_dir.replace('running_analysis', 'analysis_logs') # 1. check job logs check_job_logs(JOB_LOGS) # 2. check that all files have been created in DL1 based on training and testing lists # just check number of files first: if not len(os.listdir(DL1_training_dir)) == len( read_lines_file(training_filelist)): tf = check_files_in_dir_from_file(DL1_training_dir, training_filelist) if tf != [] and not flag_full_workflow: query_continue( "{} files from the training list are not in the `DL1/training` directory:\n{} " "Continue ?".format(len(tf), tf)) if not len(os.listdir(DL1_testing_dir)) == len( read_lines_file(testing_filelist)): tf = check_files_in_dir_from_file(DL1_testing_dir, testing_filelist) if tf != [] and not flag_full_workflow: query_continue( "{} files from the testing list are not in the `DL1/testing directory:\n{} " "Continue ?".format(len(tf), tf)) if not flag_full_workflow: print("\tmerging starts") # 3. merge DL1 files for set_type in ['testing', 'training']: tdir = os.path.join(running_DL1_dir, set_type) # dl1 files should (must otherwise you are not trying to merge) already been created output_filename = os.listdir(tdir)[0] output_filename = 'dl1_' + os.path.basename( output_filename.split('_run')[0]) if particle == 'gamma-diffuse': output_filename = output_filename.replace( 'gamma', 'gamma-diffuse') if gamma_offset is not None: output_filename += f'_{gamma_offset}' if prod_id is not None: output_filename += f'_{prod_id}' output_filename += f'_{set_type}.h5' output_filename = os.path.join(running_DL1_dir, output_filename) print(f"\t\tmerge output: {output_filename}") # 3.1 sbatch the jobs (or send them interactively depending) if the script is(not) run as part of the # whole workflow # filelist = [os.path.join(tdir, f) for f in os.listdir(tdir)] cmd = f"lstchain_merge_hdf5_files -d {tdir} -o {output_filename} --no-image {flag_no_image} " \ f"--smart {flag_merge}" os.system(cmd) # 4. move DL1 files in final place check_and_make_dir(final_DL1_dir) move_dir_content(running_DL1_dir, final_DL1_dir) print(f"\tDL1 files have been moved to {final_DL1_dir}") # copy lstchain config file there too. HiPeRTA configs are *.txt config_files = [ os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(('.json', '.txt')) ] for file in config_files: shutil.copyfile( file, os.path.join(final_DL1_dir, os.path.basename(file))) # 5. move running_dir as logs check_and_make_dir(logs_destination_dir) move_dir_content(input_dir, logs_destination_dir) print(f"\tLOGS have been moved to {logs_destination_dir}") print(f"\n ==== END {os.path.basename(__file__)} ==== \n") else: # flag_full_workflow == True ! print(f"\n\tmerging starts - {particle}") # 3. merge DL1 files wait_both_merges = [] for set_type in ['testing', 'training']: tdir = os.path.join(running_DL1_dir, set_type) # just need to take the base name of the file, so we read a processed bunch and take first file with open(training_filelist, 'r') as f: output_filename = f.readline() output_filename = 'dl1_' + os.path.basename( output_filename.split('_run')[0]) if particle == 'gamma-diffuse': output_filename = output_filename.replace( 'gamma', 'gamma-diffuse') if '_off' in particle: output_filename += f'_{gamma_offset}' output_filename += f'_{prod_id}_{set_type}' output_filename += '.h5' output_filename = os.path.join(running_DL1_dir, output_filename) print(f"\t\tmerge output: {output_filename}") # After the workflow the files will be moved, will not stay at output_filename if set_type == 'training': log_merge[particle][set_type][ 'train_path_and_outname_dl1'] = os.path.join( final_DL1_dir, os.path.basename(output_filename)) else: log_merge[particle][set_type][ 'test_path_and_outname_dl1'] = os.path.join( final_DL1_dir, os.path.basename(output_filename)) cmd = 'sbatch --parsable -p short' if wait_r0_dl1_jobs != '': cmd += ' --dependency=afterok:' + wait_r0_dl1_jobs cmd += f' -J {job_name[particle]} -e slurm-{job_name[particle]}-{set_type}.o ' \ f'-o slurm-{job_name[particle]}-{set_type}.e --wrap="{source_environment} ' \ f'lstchain_merge_hdf5_files -d {tdir} -o {output_filename} --no-image {flag_no_image} ' \ f'--smart {flag_merge}"' jobid_merge = os.popen(cmd).read().strip('\n') log_merge[particle][set_type][jobid_merge] = cmd print( f'\t\tSubmitted batch job {jobid_merge} -- {particle}, {set_type}' ) wait_both_merges.append(jobid_merge) return_jobids_debug.append(jobid_merge) # Out of testing/training loop ! # 4., 5. & 6. in the case of the full workflow are done in a separate sbatch to wait merge, the three steps: # 4 --> move DL1 files in final place # 5 --> copy lstchain config file in final_dir too # 6 --> move running_dir as logs print(f"\tDL1 files will be moved to {final_DL1_dir}") base_cmd = 'sbatch --parsable -p short -J {} -e {} -o {} --dependency=afterok:{} ' \ '--wrap="python batch_dl1_utils-merge_and_copy.py -s {} -d {} --copy_conf {}"' wait_both_merges = ','.join(wait_both_merges) # 4 --> move DL1 files in final place batch_mv_dl1 = base_cmd.format( job_name[particle].split('_')[0] + '_mv_dl1', f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_files.e', f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_files.o', wait_both_merges, running_DL1_dir, final_DL1_dir, 'False') jobid_move_dl1 = os.popen(batch_mv_dl1).read().strip('\n') log_merge[particle][set_type][jobid_move_dl1] = batch_mv_dl1 print( f'\t\tSubmitted batch job {jobid_move_dl1}. It will move dl1 files when {wait_both_merges} finish.' ) # 5 --> copy lstchain config file in final_dir too batch_copy_conf = base_cmd.format( job_name[particle].split('_')[0] + '_cp_conf', f'slurm-{job_name[particle].split("_")[0]}_cp_config.e', f'slurm-{job_name[particle].split("_")[0]}_cp_config.o', jobid_move_dl1, input_dir, final_DL1_dir, 'True') jobid_copy_conf = os.popen(batch_copy_conf).read().strip('\n') log_merge[particle][set_type][jobid_copy_conf] = batch_copy_conf print( f'\t\tSubmitted batch job {jobid_copy_conf}. It will copy the used config when {jobid_move_dl1} finish.' ) # 6 --> move running_dir to final analysis_logs batch_mv_dir = base_cmd.format( job_name[particle].split('_')[0] + '_mv_dir', f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_direct.e', f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_direct.o', jobid_copy_conf, input_dir, logs_destination_dir, 'False') jobid_move_log = os.popen(batch_mv_dir).read().strip('\n') log_merge[particle][set_type][jobid_move_log] = batch_mv_dir print( f'\t\tSubmitted batch job {jobid_move_log}. It will move running_dir when {jobid_copy_conf} finish.' ) return_jobids4train.append(jobid_move_dl1) return_jobids_debug.append(jobid_move_dl1) return_jobids_debug.append(jobid_move_log) return_jobids_debug.append(jobid_copy_conf) print(f"\tLOGS will be moved to {logs_destination_dir}") # Little clarification (it will not be clear in log). These keys are stored here for 2 purposes: # 1 - train_pipe recover final dl1 names and path. # 2 - dl1_to_dl2 recover the jobids of the merged dl1 files; (all dl1 files MUST be merged and moved # to dl1_dir), so instead of storing the jobid that merges all the *particle*_dl1 (jobid_merge), it will # be store the jobid that move the dl1 final file to dl1_dir. Until this step is not finished, the workflow # cannot continue. return_jobids4train = ','.join(return_jobids4train) return_jobids_debug = ','.join(return_jobids_debug) return log_merge, return_jobids4train, return_jobids_debug
def main(input_dir, config_file=None, train_test_ratio=0.5, random_seed=42, n_r0_files_per_dl1_job=0, flag_full_workflow=False, particle=None, prod_id=None, source_environment=None, offset=None): """ R0 to DL1 MC onsite conversion. Parameters ---------- input_dir : str path to the files directory to analyse config_file :str Path to a configuration file. If none is given, a standard configuration is applied train_test_ratio :int Ratio of training data. Default = 0.5 random_seed : int Random seed for random processes. Default = 42 n_r0_files_per_dl1_job : int Number of r0 files processed by each r0_to_dl1 batched stage. If set to 0 (Default), see below the `usual production` case.n_r0_files_per_dl1_job If the number of r0 files found in `input_dir` is less than 100, it is consider to be a test on a small production. Therefore, the number of r0 files treated per batched stage will be set to 10. Usual productions have =>1000 r0 files, in this case, the number of batched jobs will be fixed to 50 (in case of gamma and electrons), 80 for (gamma-diffuse) and 125 to protons. This means that there will be batched a total of 50+50+80+125 = 305 jobs only for the r0_to_dl1 stage. (there are 1k r0 files for gammas (although 2 offsets, thus 2k files), 2k r0 files for gd and e- and 5k for protons). Default = 0 particle : str particle type for `flag_full_workflow` = True offset : str gamma offset prod_id :str Production ID. If None, _v00 will be used, indicating an official base production. Default = None. flag_full_workflow : bool Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files. source_environment : str path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script) to activate a certain conda environment. DEFAULT: `source /fefs/aswg/software/virtual_env/.bashrc; conda activate cta`. ! NOTE : train_pipe AND dl1_to_dl2 **MUST** be run with the same environment. Returns ------- jobid2log : dict (if flag_full_workflow is True) A dictionary of dictionaries containing the full log information of the script. The first `layer` contains only the each jobid that the scripts has batched. dict[jobid] = information The second layer contains, organized by jobid, - the kind of particle that corresponded to the jobid - the command that was run to batch the job into the server - the path to both the output and error files (job_`jobid`.o and job_`jobid`.e) that were generated when the job was send to the cluster dict[jobid].keys() = ['particle', 'sbatch_command', 'jobe_path', 'jobo_path'] **** otherwise : (if flag_full_workflow is False, by default) **** None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO jobids_r0_dl1 A list of all the jobs sent by particle (including test and train set types). """ if not flag_full_workflow: print(f"\n ==== START {os.path.basename(__file__)} ==== \n") # This formatting should be the same as in `onsite_mc_r0_to_dl3.py` today = calendar.datetime.date.today() base_prod_id = f'{today.year:04d}{today.month:02d}{today.day:02d}_v{lstchain.__version__}' suffix_id = '_v00' if prod_id is None else '_{}'.format(prod_id) PROD_ID = base_prod_id + suffix_id else: # Full prod_id is passed as argument PROD_ID = prod_id TRAIN_TEST_RATIO = float(train_test_ratio) RANDOM_SEED = random_seed #NFILES_PER_DL1 = n_files_per_dl1 #DESIRED_DL1_SIZE_MB = 1000 #N_R0_PER_DL1_JOB = n_r0_files_per_dl1_job DL0_DATA_DIR = input_dir if source_environment is not None: manage_source_env_r0_dl1(source_and_env=source_environment, file=os.path.abspath('./core_list.sh')) ############################################################################## print(f"Working on DL0 files in {DL0_DATA_DIR}") check_data_path(DL0_DATA_DIR) raw_files_list = get_input_filelist(DL0_DATA_DIR) if len(raw_files_list) < 100: N_R0_PER_DL1_JOB = 10 elif n_r0_files_per_dl1_job == 0: if 'gamma' in input_dir: N_R0_PER_DL1_JOB = 25 elif 'gamma-diffuse' in input_dir or 'electron' in input_dir: N_R0_PER_DL1_JOB = 50 elif 'proton' in input_dir: N_R0_PER_DL1_JOB = 125 else: N_R0_PER_DL1_JOB = 50 else: N_R0_PER_DL1_JOB = n_r0_files_per_dl1_job # if NFILES_PER_DL1 == 0: # size_dl0 = os.stat(raw_files_list[0]).st_size / 1e6 # reduction_dl0_dl1 = 5 # size_dl1 = size_dl0 / reduction_dl0_dl1 # NFILES_PER_DL1 = max(1, int(DESIRED_DL1_SIZE_MB / size_dl1)) random.seed(RANDOM_SEED) random.shuffle(raw_files_list) number_files = len(raw_files_list) ntrain = int(number_files * TRAIN_TEST_RATIO) ntest = number_files - ntrain training_list = raw_files_list[:ntrain] testing_list = raw_files_list[ntrain:] print("\t{} raw files".format(number_files)) print("\t{} files in training dataset".format(ntrain)) print("\t{} files in test dataset".format(ntest)) with open('training.list', 'w+') as newfile: for f in training_list: newfile.write(f) newfile.write('\n') with open('testing.list', 'w+') as newfile: for f in testing_list: newfile.write(f) newfile.write('\n') if flag_full_workflow and 'off' in particle: # join(BASE_PATH, 'DL0', OBS_DATE, '{particle}', ZENITH, POINTING, 'PLACE_4_PROD_ID', GAMMA_OFF) DL0_DATA_DIR = DL0_DATA_DIR.split(offset)[0] # Take out /off0.Xdeg RUNNING_DIR = os.path.join( DL0_DATA_DIR.replace('DL0', 'running_analysis'), PROD_ID, offset) else: RUNNING_DIR = os.path.join( DL0_DATA_DIR.replace('DL0', 'running_analysis'), PROD_ID) JOB_LOGS = os.path.join(RUNNING_DIR, 'job_logs') DL1_DATA_DIR = os.path.join(RUNNING_DIR, 'DL1') # DIR_LISTS_BASE = os.path.join(RUNNING_DIR, 'file_lists') # ADD CLEAN QUESTION print("\tRUNNING_DIR: \t", RUNNING_DIR) print("\tJOB_LOGS DIR: \t", JOB_LOGS) print("\tDL1 DATA DIR: \t", DL1_DATA_DIR) for directory in [RUNNING_DIR, DL1_DATA_DIR, JOB_LOGS]: if flag_full_workflow: check_and_make_dir_without_verification(directory) else: check_and_make_dir(directory) # dumping the training and testing lists and splitting them in sub-lists for parallel jobs jobid2log = {} jobids_r0_dl1 = [] for set_type in 'training', 'testing': if set_type == 'training': list_type = training_list else: list_type = testing_list dir_lists = os.path.join(RUNNING_DIR, 'file_lists_' + set_type) output_dir = os.path.join(RUNNING_DIR, 'DL1') output_dir = os.path.join(output_dir, set_type) if flag_full_workflow: check_and_make_dir_without_verification(dir_lists) check_and_make_dir_without_verification(output_dir) else: check_and_make_dir(dir_lists) check_and_make_dir(output_dir) print("\toutput dir: \t", output_dir) number_of_sublists = len(list_type) // N_R0_PER_DL1_JOB + int( len(list_type) % N_R0_PER_DL1_JOB > 0) for i in range(number_of_sublists): output_file = os.path.join(dir_lists, '{}_{}.list'.format(set_type, i)) with open(output_file, 'w+') as out: for line in list_type[i * N_R0_PER_DL1_JOB:N_R0_PER_DL1_JOB * (i + 1)]: out.write(line) out.write('\n') print(f'\t{number_of_sublists} files generated for {set_type} list') # LSTCHAIN # counter = 0 save_job_ids = [] for file in os.listdir(dir_lists): if set_type == 'training': jobo = os.path.join(JOB_LOGS, f"job{counter}_train.o") jobe = os.path.join(JOB_LOGS, f"job{counter}_train.e") else: jobo = os.path.join(JOB_LOGS, f"job{counter}_test.o") jobe = os.path.join(JOB_LOGS, f"job{counter}_test.e") cc = ' -c {}'.format( config_file) if config_file is not None else ' ' base_cmd = f'core_list.sh "lstchain_mc_r0_to_dl1 -o {output_dir} {cc}"' # recover or not the jobid depending of the workflow mode if not flag_full_workflow: # Run interactively cmd = f'sbatch -p short -e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}' # print(cmd) os.system(cmd) else: # flag_full_workflow == True ! job_name = { 'electron': 'r0dl1_e', 'gamma': 'r0dl1_g', 'gamma-diffuse': 'r0dl1_gd', 'proton': 'r0dl1_p', 'gamma_off0.0deg': 'g0.0_r0dl1', 'gamma_off0.4deg': 'g0.4_r0dl1' } if particle == 'proton': queue = 'long' else: queue = 'long' # TODO change to short after prod5 check cmd = f'sbatch --parsable -p {queue} -J {job_name[particle]} ' \ f'-e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}' jobid = os.popen(cmd).read().strip('\n') jobids_r0_dl1.append(jobid) # Fill the dictionaries if IN workflow mode jobid2log[jobid] = {} jobid2log[jobid]['particle'] = particle jobid2log[jobid]['set_type'] = set_type jobid2log[jobid]['jobe_path'] = jobe jobid2log[jobid]['jobo_path'] = jobo jobid2log[jobid]['sbatch_command'] = cmd # print(f'\t\t{cmd}') # print(f'\t\tSubmitted batch job {jobid}') save_job_ids.append(jobid) counter += 1 if flag_full_workflow: print(f"\n\t{counter} jobs submitted - {particle} {set_type}. " f"From jobid {save_job_ids[0]} - {save_job_ids[-1]}\n") time.sleep(1) # Avoid collapsing LP cluster # copy this script and config into working dir shutil.copyfile(__file__, os.path.join(RUNNING_DIR, os.path.basename(__file__))) if config_file is not None: shutil.copyfile( config_file, os.path.join(RUNNING_DIR, os.path.basename(config_file))) # save file lists into logs shutil.move('testing.list', os.path.join(RUNNING_DIR, 'testing.list')) shutil.move('training.list', os.path.join(RUNNING_DIR, 'training.list')) # create log dictionary and return it if IN workflow mode if flag_full_workflow: return jobid2log, jobids_r0_dl1 else: print(f"\n ==== END {os.path.basename(__file__)} ==== \n")
def main(input_dir, config_rta_file=None, train_test_ratio=0.5, random_seed=42, n_r0_files_per_dl1_job=0, flag_full_workflow=False, particle=None, prod_id=None, offset=None, keep_rta_file=False, lst_config=None): """ same as for r0_to_dl1 lst-like but with the exceptions of rta Parameters ---------- input_dir : str path to the files directory to analyse config_rta_file : str Path to a HiPeRTA configuration file. If none is given, a standard configuration is applied train_test_ratio : int Ratio of training data. Default = 0.5 random_seed : int Random seed for random processes. Default = 42 n_r0_files_per_dl1_job : int Number of r0 files processed by each r0_to_dl1 batched stage. If set to 0 (Default), see below the `usual production` case.n_r0_files_per_dl1_job particle : str particle type for `flag_full_workflow` = True offset : str gamma offset prod_id :str Production ID. If None, _v00 will be used, indicating an official base production. Default = None. flag_full_workflow : bool Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files. keep_rta_file : bool Argument to be passed to the hiperta_r0_to_dl1lstchain script, which runs the hiperta_r0_dl1 and re-organiser stage lst_config: str path used just to copy the config to `running analysis` Returns ------- """ if not flag_full_workflow: # This formatting should be the same as in `onsite_mc_r0_to_dl3_hiperta.py` print("\n ==== START {} ==== \n".format(os.path.basename(__file__))) today = calendar.datetime.date.today() base_prod_id = f'{today.year:04d}{today.month:02d}{today.day:02d}_vRTA' suffix_id = '_v00' if prod_id is None else '_{}'.format(prod_id) PROD_ID = base_prod_id + suffix_id else: # Full prod_id is passed as argument PROD_ID = prod_id TRAIN_TEST_RATIO = float(train_test_ratio) RANDOM_SEED = random_seed #NFILES_PER_DL1 = int(n_files_per_dl1) #DESIRED_DL1_SIZE_MB = 1000 DL0_DATA_DIR = input_dir ############################################################################## print("Working on MCHDF5 R0 files in {}".format(DL0_DATA_DIR)) check_data_path(DL0_DATA_DIR) raw_files_list = get_input_filelist(DL0_DATA_DIR) if len(raw_files_list) < 100: N_R0_PER_DL1_JOB = 10 elif n_r0_files_per_dl1_job == 0: if 'gamma' in input_dir: N_R0_PER_DL1_JOB = 25 elif 'gamma-diffuse' in input_dir or 'electron' in input_dir: N_R0_PER_DL1_JOB = 50 elif 'proton' in input_dir: N_R0_PER_DL1_JOB = 125 else: N_R0_PER_DL1_JOB = 50 else: N_R0_PER_DL1_JOB = n_r0_files_per_dl1_job # if NFILES_PER_DL1 == 0: # size_dl0 = os.stat(raw_files_list[0]).st_size / 1e6 # reduction_dl0_dl1 = 5 # size_dl1 = size_dl0 / reduction_dl0_dl1 # NFILES_PER_DL1 = max(1, int(DESIRED_DL1_SIZE_MB / size_dl1)) random.seed(RANDOM_SEED) random.shuffle(raw_files_list) number_files = len(raw_files_list) ntrain = int(number_files * TRAIN_TEST_RATIO) ntest = number_files - ntrain training_list = raw_files_list[:ntrain] testing_list = raw_files_list[ntrain:] print("\t{} raw files".format(number_files)) print("\t{} files in training dataset".format(ntrain)) print("\t{} files in test dataset".format(ntest)) with open('training.list', 'w+') as newfile: for f in training_list: newfile.write(f) newfile.write('\n') with open('testing.list', 'w+') as newfile: for f in testing_list: newfile.write(f) newfile.write('\n') if flag_full_workflow and 'off' in particle: # join(BASE_PATH, 'DL0', OBS_DATE, '{particle}', ZENITH, POINTING, 'PLACE_4_PROD_ID', GAMMA_OFF) DL0_DATA_DIR = DL0_DATA_DIR.split(offset)[0] # Take out /off0.Xdeg RUNNING_DIR = os.path.join(DL0_DATA_DIR.replace('R0', 'running_analysis'), PROD_ID, offset) else: RUNNING_DIR = os.path.join(DL0_DATA_DIR.replace('R0', 'running_analysis'), PROD_ID) JOB_LOGS = os.path.join(RUNNING_DIR, 'job_logs') DL1_DATA_DIR = os.path.join(RUNNING_DIR, 'DL1') # DIR_LISTS_BASE = os.path.join(RUNNING_DIR, 'file_lists') # ADD CLEAN QUESTION print("\tRUNNING_DIR: \t", RUNNING_DIR) print("\tJOB_LOGS DIR: \t", JOB_LOGS) print("\tDL1 DATA DIR: \t", DL1_DATA_DIR) for directory in [RUNNING_DIR, DL1_DATA_DIR, JOB_LOGS]: if flag_full_workflow: check_and_make_dir_without_verification(directory) else: check_and_make_dir(directory) # dumping the training and testing lists and splitting them in sub-lists for parallel jobs jobid2log = {} jobids_RTA_r0_dl1_reorganized = [] for set_type in 'training', 'testing': if set_type == 'training': list = training_list else: list = testing_list dir_lists = os.path.join(RUNNING_DIR, 'file_lists_' + set_type) output_dir = os.path.join(RUNNING_DIR, 'DL1') output_dir = os.path.join(output_dir, set_type) if flag_full_workflow: check_and_make_dir_without_verification(dir_lists) check_and_make_dir_without_verification(output_dir) else: check_and_make_dir(dir_lists) check_and_make_dir(output_dir) print("\toutput dir: \t", output_dir) number_of_sublists = len(list) // N_R0_PER_DL1_JOB + int(len(list) % N_R0_PER_DL1_JOB > 0) for i in range(number_of_sublists): output_file = os.path.join(dir_lists, '{}_{}.list'.format(set_type, i)) with open(output_file, 'w+') as out: for line in list[i * N_R0_PER_DL1_JOB:N_R0_PER_DL1_JOB * (i + 1)]: out.write(line) out.write('\n') print('\t{} files generated for {} list'.format(number_of_sublists, set_type)) # HiPeRTA ### counter = 0 save_job_ids = [] for file in os.listdir(dir_lists): if set_type == 'training': jobo = os.path.join(JOB_LOGS, "job{}_train.o".format(counter)) jobe = os.path.join(JOB_LOGS, "job{}_train.e".format(counter)) else: jobo = os.path.join(JOB_LOGS, "job{}_test.o".format(counter)) jobe = os.path.join(JOB_LOGS, "job{}_test.e".format(counter)) # TODO for the moment is only user enrique.garcia who has installed HiPeRTA ## cc = ' -c {}'.format(config_rta_file) if config_rta_file is not None else ' ' base_cmd = f'core_list_hiperta.sh "/home/enrique.garcia/software/LST_scripts/lst_scripts/' \ f'hiperta_r0_to_dl1lstchain.py -o {output_dir} -k {keep_rta_file} {cc}"' # recover or not the jobid depending of the workflow mode if not flag_full_workflow: cmd = f'sbatch -p short -e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}' # print(cmd) os.system(cmd) else: # flag_full_workflow == True ! job_name = {'electron': 'e_RTA-r0dl1', 'gamma': 'g_RTA-r0dl1', 'gamma-diffuse': 'gd_RTA-r0dl1', 'proton': 'p_RTA-r0dl1', 'gamma_off0.0deg': 'g0.0_RTA-r0dl1', 'gamma_off0.4deg': 'g0.4_RTA-r0dl1' } if particle == 'proton': queue = 'long' else: queue = 'long' # TODO change to short after prod5 check cmd = f'sbatch --parsable -p {queue} -J {job_name[particle]} ' \ f'-e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}' jobid = os.popen(cmd).read().strip('\n') jobids_RTA_r0_dl1_reorganized.append(jobid) # Fill the dictionaries if IN workflow mode jobid2log[jobid] = {} jobid2log[jobid]['particle'] = particle jobid2log[jobid]['set_type'] = set_type jobid2log[jobid]['jobe_path'] = jobe jobid2log[jobid]['jobo_path'] = jobo jobid2log[jobid]['sbatch_command'] = cmd # print(f'\t\t{cmd}') # print(f'\t\tSubmitted batch job {jobid}') save_job_ids.append(jobid) counter += 1 if flag_full_workflow: print(f"\n\t{counter} jobs submitted - {particle} {set_type}. " f"From jobid {save_job_ids[0]} - {save_job_ids[-1]}\n") time.sleep(1) # Avoid collapsing LP cluster # copy this script itself into logs shutil.copyfile(__file__, os.path.join(RUNNING_DIR, os.path.basename(__file__))) # copy config file into logs if config_rta_file is not None: shutil.copy(config_rta_file, os.path.join(RUNNING_DIR, os.path.basename(config_rta_file))) if lst_config is not None: shutil.copy(lst_config, os.path.join(RUNNING_DIR, os.path.basename(lst_config))) # save file lists into logs shutil.move('testing.list', os.path.join(RUNNING_DIR, 'testing.list')) shutil.move('training.list', os.path.join(RUNNING_DIR, 'training.list')) # create log dictionary and return it if IN workflow mode if flag_full_workflow: return jobid2log, jobids_RTA_r0_dl1_reorganized else: print("\n ==== END {} ==== \n".format(os.path.basename(__file__)))