Python check_and_make_dir Exemples, lstmcpipe.data_management.check_and_make_dir Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : onsite_mc_dl1_to_dl2.py Projet : garciagenrique/LST_scripts

def main(input_dir,
         path_models,
         config_file,
         flag_full_workflow=False,
         particle=None,
         wait_jobid_train_pipe=None,
         wait_jobids_merge=None,
         dictionary_with_dl1_paths=None,
         source_environment=None):
    """
    Convert onsite files from dl1 to dl2"

    Parameters
    ----------
    input_dir : str
        path to the files directory to analyse
    path_models : str
        path to the trained models
    config_file : str
        Path to a configuration file. If none is given, a standard configuration is applied
    flag_full_workflow : bool
        Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files.
    particle : str
        Type of particle used to create the log and dictionary
            ! COMPULSORY argument when flag_full_workflow is set to True.
    wait_jobid_train_pipe : str
        a string with the batched jobid from the train stage to indicate the
        dependencies of the job to be batched
            ! COMPULSORY argument when flag_full_workflow is set to True.
    wait_jobids_merge : str
        string with merge_and_copy jobids
            ! COMPULSORY argument when flag_full_workflow is set to True.
    dictionary_with_dl1_paths : dict
        Dictionary with 'particles' as keys containing final outnames of dl1 files.
            ! COMPULSORY argument when flag_full_workflow is set to True.
    source_environment : str
        path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script) to activate
        a certain conda environment.
         DEFAULT: `source /fefs/aswg/software/virtual_env/.bashrc; conda activate cta`.
        ! NOTE : train_pipe AND dl1_to_dl2 MUST BE RUN WITH THE SAME ENVIRONMENT

    Returns
    -------
    log_dl1_to_dl2 : dict
        dictionary of dictionaries containing the jobid of the batched job as key and the run command (the
        lstchain_mc_dl1_to_dl2 command with all its corresponding arguments) as value.

        ****  otherwise : (if flag_full_workflow is False, by default) ****
        None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO

    jobid_dl1_to_dl2 : str
        jobid of the batched job to be send (for dependencies purposes) to the next stage of the
        workflow (dl2_to_irfs)

    """

    output_dir = input_dir.replace('DL1', 'DL2')

    if flag_full_workflow:

        check_and_make_dir_without_verification(output_dir)
        print(f"\tOutput dir {particle}: {output_dir}")

        log_dl1_to_dl2 = {particle: {}}

        # path to dl1 files by particle type
        file_list = [
            dictionary_with_dl1_paths[particle]['training']
            ['train_path_and_outname_dl1'], dictionary_with_dl1_paths[particle]
            ['testing']['test_path_and_outname_dl1']
        ]

        return_jobids = []

        if wait_jobid_train_pipe == '':
            wait_jobs = wait_jobids_merge
        elif wait_jobids_merge == '':
            wait_jobs = wait_jobid_train_pipe
        elif wait_jobids_merge == '' and wait_jobid_train_pipe == '':
            wait_jobs = ''
        else:
            wait_jobs = ','.join([wait_jobid_train_pipe, wait_jobids_merge])

        job_name = {
            'electron': 'dl1-2_e',
            'gamma': 'dl1-2_g',
            'gamma-diffuse': 'dl1-2_gd',
            'proton': 'dl1-2_p',
            'gamma_off0.0deg': 'g.0_1-2dl',
            'gamma_off0.4deg': 'g.4_1-2dl'
        }

    else:
        print(f"\n ==== START {os.path.basename(__file__)} ==== \n")

        check_and_make_dir(output_dir)
        print(f"Output dir: {output_dir}")

        file_list = [
            os.path.join(input_dir, f) for f in os.listdir(input_dir)
            if f.startswith('dl1_')
        ]

        query_continue(f"{len(file_list)} jobs,  ok?")

    for file in file_list:

        cmd = ''
        if source_environment is not None:
            cmd += source_environment
        cmd += f'lstchain_dl1_to_dl2 -f {file} -p {path_models} -o {output_dir}'

        if config_file is not None:
            cmd += f' -c {config_file}'

        if not flag_full_workflow:  # Run interactively
            # print(cmd)
            os.system(cmd)

        else:  # flag_full_workflow == True !
            # 'sbatch --parsable --dependency=afterok:{wait_ids_proton_and_gammas} --wrap="{cmd}"'

            if 'training' in file:
                ftype = 'train'
            elif 'testing' in file:
                ftype = 'test'
            else:
                ftype = '-'

            jobe = os.path.join(output_dir, f"dl1_dl2_{particle}_{ftype}job.e")
            jobo = os.path.join(output_dir, f"dl1_dl2_{particle}_{ftype}job.o")

            batch_cmd = f'sbatch --parsable -p short --dependency=afterok:{wait_jobs} -J {job_name[particle]}' \
                        f' -e {jobe} -o {jobo} --wrap="{cmd}"'

            jobid_dl1_to_dl2 = os.popen(batch_cmd).read().strip('\n')

            log_dl1_to_dl2[particle][jobid_dl1_to_dl2] = batch_cmd
            if 'testing' in file:
                log_dl1_to_dl2[particle]['dl2_test_path'] = file.replace(
                    '/DL1/', '/DL2/').replace('dl1_', 'dl2_')
            return_jobids.append(jobid_dl1_to_dl2)

    # copy this script and config into working dir
    shutil.copyfile(__file__,
                    os.path.join(output_dir, os.path.basename(__file__)))
    if config_file is not None:
        shutil.copyfile(
            config_file, os.path.join(output_dir,
                                      os.path.basename(config_file)))

    if not flag_full_workflow:
        print(f"\n ==== END {os.path.basename(__file__)} ==== \n")
    else:
        return_jobids = ','.join(return_jobids)

        return log_dl1_to_dl2, return_jobids

Exemple #2

0

Afficher le fichier

Fichier : onsite_mc_train.py Projet : garciagenrique/LST_scripts

def main(gamma_dl1_train_file,
         proton_dl1_train_file,
         config_file=None,
         source_environment=source_env,
         flag_full_workflow=False,
         wait_ids_proton_and_gammas=None):
    """
    Train RF from dl1 data  (onsite LaPalma cluster)

    Parameters
    ----------
    gamma_dl1_train_file: str
        path to the gamma file
    proton_dl1_train_file: str
        path to the proton file
    config_file: str
        Path to a configuration file. If none is given, a standard configuration is applied
    source_environment : str
        path to a .bashrc file (lstanalyzer user by default - can be configurable for custom runs) to activate a
        certain conda environment. By default : `conda activate cta`.
        ! NOTE : train_pipe AND dl1_to_dl2 MUST BE RUN WITH THE SAME ENVIRONMENT
    flag_full_workflow :  bool
        Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files.
    wait_ids_proton_and_gammas : str
        a string (of chained jobids separated by ',' and without spaces between each element), to indicate the
        dependencies of the job to be batched
        COMPULSORY argument when flag_full_workflow is set to True.


    Returns
    -------
    log_train : dict (if flag_full_workflow is True)
        dictionary containing the jobid of the batched job as key and the run command (the lstchain_mc_train
        command with all its corresponding arguments) as value.

        ****  otherwise : (if flag_full_workflow is False, by default) ****
        None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO

    jobid_train : str (if flag_full_workflow is True)
        jobid of the batched job to be send (for dependencies purposes) to the next stage of the
        workflow (onsite_mc_dl1_to_dl2)

    models_dir : str
        Path with the directory where the models are stored

    """

    if flag_full_workflow:
        log_train = {}

    else:
        print(f"\n ==== START {os.path.basename(__file__)} ==== \n")

    # dl1_gamma_dir = os.path.dirname(os.path.abspath(gamma_dl1_train_file))
    dl1_proton_dir = os.path.dirname(os.path.abspath(proton_dl1_train_file))

    # check if it path follows the established paths (lstchain-like) or not (rta-like) ##
    if dl1_proton_dir.find('/mc/DL1/') > 0:
        models_dir = dl1_proton_dir.replace('/mc/DL1', '/models')
    else:
        models_dir = dl1_proton_dir.replace('/DL1', '/models')
    models_dir = models_dir.replace('/proton/', '/')

    print(f"\tModels will be placed in {models_dir}")
    if flag_full_workflow:
        check_and_make_dir_without_verification(models_dir)
    else:
        check_and_make_dir(models_dir)

    base_cmd = ''
    base_cmd += source_environment
    base_cmd += f'lstchain_mc_trainpipe --fg {os.path.abspath(gamma_dl1_train_file)}' \
                f' --fp {os.path.abspath(proton_dl1_train_file)} -o {models_dir}'

    if config_file is not None:
        base_cmd = base_cmd + ' -c {}'.format(config_file)

    jobo = os.path.join(models_dir, "train_job.o")
    jobe = os.path.join(models_dir, "train_job.e")

    if not flag_full_workflow:
        cmd = f'sbatch -p long -e {jobe} -o {jobo} --wrap="{base_cmd}" '

        print(cmd)
        os.system(cmd)

    else:  # flag_full_workflow == True !
        # 'sbatch --parsable --dependency=afterok:{wait_ids_proton_and_gammas} -e {jobe} -o {jobo} --wrap="{base_cmd}"'
        cmd = 'sbatch --parsable -p long'
        if wait_ids_proton_and_gammas != '':
            cmd += ' --dependency=afterok:' + wait_ids_proton_and_gammas
        cmd += f' -J train_pipe -e {jobe} -o {jobo} --wrap="{base_cmd}" '

        jobid_train = os.popen(cmd).read().strip('\n')
        log_train[jobid_train] = cmd

    # copy this script and config into working dir
    shutil.copyfile(__file__,
                    os.path.join(models_dir, os.path.basename(__file__)))
    if config_file is not None:
        shutil.copyfile(
            config_file, os.path.join(models_dir,
                                      os.path.basename(config_file)))

    if not flag_full_workflow:
        print(f"\n ==== END {os.path.basename(__file__)} ==== \n")
    else:
        return log_train, jobid_train, models_dir

Exemple #3

0

Afficher le fichier

def main(dl2_directory,
         config_file,
         irf_point_like=True,
         irf_gamma_offset='0.0deg',
         source_env=None,
         flag_full_workflow=False,
         log_from_dl1_dl2={},
         wait_jobs_dl1dl2=None,
         prod_id=None):
    """
    Batches/runs interactively the lstchain `lstchain_create_irf_files` entry point.
    Last stage of the MC prod workflow.

    Parameters
    ----------
    dl2_directory: str
        General path to DL2 directory, not formatted with the particle.
    config_file: str
        Path to a configuration file. If none is given, a standard configuration is applied
    irf_point_like: bool
        MC prod configuration argument to create IRFs: {True: gamma, False: gamma-diffuse}.
    irf_gamma_offset: str
        MC prod configuration argument to create IRFs: 0.0deg (for ON/OFF obs) or 0.4deg (for wobble obs).
    source_env: str
        path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script) to activate
        a certain conda environment.
    flag_full_workflow: bool
        Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files.
    log_from_dl1_dl2: dict
        Dictionary with dl2 output path. Files are not yet here, but path and full name are needed to batch the job.
    wait_jobs_dl1dl2: str
        Comma separated string with the job ids of previous stages (dl1_to_dl2 stage) to be passed as dependencies to
        the create_irfs_files job to be batched.
    prod_id: str
        prod_id defined within config_MC_prod.yml file

    Returns
    -------
    log_dl2_to_irfs: dict
        Dictionary-wise log containing {'job_id': 'batched_cmd'}
    list_job_id_dl2_irfs: list
        Job-ids of the batched job to be passed to the last (MC prod check) stage of the workflow.
    """
    allowed_gamma_off = ['0.0deg', '0.4deg']
    if irf_gamma_offset not in allowed_gamma_off:
        print(
            f'Please select a valid gamma_offset to compute the IRFS: {" or ".join(allowed_gamma_off)}'
        )
        exit(-1)

    output_irfs_dir = dl2_directory.replace('/DL2/',
                                            '/IRF/').replace('/{}/', '/')

    log_dl2_to_irfs = {}
    list_job_id_dl2_irfs = []

    if not flag_full_workflow or log_from_dl1_dl2 == {}:
        dl2_particle_paths = check_dl2_files(dl2_directory, irf_point_like,
                                             irf_gamma_offset)

        # Comprehension list to find gamma or gamma-diffuse
        gamma_kind = [
            g for g in dl2_particle_paths.keys() if g.startswith('gamma')
        ][0]

        gamma_file = dl2_particle_paths[gamma_kind]
        proton_file = dl2_particle_paths['proton']
        electron_file = dl2_particle_paths['electron']

        name_tag_slurm = gamma_kind

    else:
        proton_file = log_from_dl1_dl2['proton']['dl2_test_path']
        electron_file = log_from_dl1_dl2['electron']['dl2_test_path']

        if irf_point_like and irf_gamma_offset == '0.0deg':
            gamma_file = log_from_dl1_dl2['gamma_off0.0deg']['dl2_test_path']
            name_tag_slurm = irf_gamma_offset
        elif irf_point_like and irf_gamma_offset == '0.4deg':
            gamma_file = log_from_dl1_dl2['gamma_off0.4deg']['dl2_test_path']
            name_tag_slurm = irf_gamma_offset
        else:
            gamma_file = log_from_dl1_dl2['gamma-diffuse']['dl2_test_path']
            name_tag_slurm = 'diffuse'

    if irf_point_like:
        point_like = '--point-like'
    else:
        point_like = ''

    # Final outfile name with IRF kind
    if prod_id is None:
        output_filename_irf = os.path.join(output_irfs_dir, 'irf.fits.gz')
    else:
        if irf_point_like:
            output_filename_irf = os.path.join(
                output_irfs_dir, 'irf_' + prod_id.replace('.', '') +
                f'gamma_point-like_off{irf_gamma_offset.replace(".", "")}.fits.gz'
            )
        else:
            output_filename_irf = os.path.join(
                output_irfs_dir,
                'irf_' + prod_id.replace('.', '') + f'gamma_diffuse.fits.gz')

    cmd = f'lstchain_create_irf_files {point_like} -g {gamma_file} -p {proton_file} -e {electron_file}' \
          f' -o {output_filename_irf}'
    if config_file is not None:
        cmd += f' --config={config_file}'

    if not flag_full_workflow:
        print(f"\n ==== START {os.path.basename(__file__)} ==== \n")

        check_and_make_dir(output_irfs_dir)
        # print (cmd)
        os.system(cmd)

        print(f"\n ==== END {os.path.basename(__file__)} ==== \n")

    else:  # flag_full_workflow == True !
        print(f'\tOutput dir IRFs: {output_irfs_dir}')

        check_and_make_dir_without_verification(output_irfs_dir)

        jobe = os.path.join(output_irfs_dir, f"job_dl2_to_irfs.e")
        jobo = os.path.join(output_irfs_dir, f"job_dl2_to_irfs.o")

        batch_cmd = f'sbatch --parsable -p short --dependency=afterok:{wait_jobs_dl1dl2} -J MC_IRF_{name_tag_slurm}' \
                    f' -e {jobe} -o {jobo} --wrap="{source_env} {cmd}"'

        job_id_dl2_irfs = os.popen(batch_cmd).read().strip('\n')

        log_dl2_to_irfs[job_id_dl2_irfs] = batch_cmd
        list_job_id_dl2_irfs.append(job_id_dl2_irfs)

    # Copy Script and config into working dir
    shutil.copyfile(__file__,
                    os.path.join(output_irfs_dir, os.path.basename(__file__)))
    if config_file is not None or config_file is not '':
        shutil.copyfile(
            config_file,
            os.path.join(output_irfs_dir, os.path.basename(config_file)))

    if flag_full_workflow:
        return log_dl2_to_irfs, list_job_id_dl2_irfs

Exemple #4

0

Afficher le fichier

Fichier : onsite_mc_merge_and_copy_dl1.py Projet : garciagenrique/LST_scripts

def main(input_dir,
         flag_full_workflow=False,
         particle2jobs_dict={},
         particle=None,
         flag_merge=False,
         flag_no_image=True,
         prod_id=None,
         gamma_offset=None,
         source_environment=None):
    """
    Merge and copy DL1 data after production.

        1. check job_logs
        2. check that all files have been created in DL1 based on training and testing lists
        3. move DL1 files in final place
        4. merge DL1 files
        5. move running_dir


    Parameters
    ----------
    input_dir : str
        path to the DL1 files directory to merge, copy and move.  Compulsory argument.

    flag_full_workflow : bool
        Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files.

    particle2jobs_dict : dict
        Dictionary used to retrieve the r0 to dl1 jobids that were sent in the previous step of the r0-dl3 workflow.
        This script will NOT start until all the jobs sent before have finished.
        COMPULSORY argument when flag_full_workflow is set to True.

    particle : str
        Type of particle used to create the log and dictionary
        COMPULSORY argument when flag_full_workflow is set to True.

    flag_merge : bool
        Flag to indicate whether the `--smart` argument of the `lstchain_merge_hdf5_files.py` script must be set to
        True (smart merge) or False (auto merge).
        Default set to True.

    flag_no_image : bool
        Flaf to indicate whether the `--no-image` argument of the `lstchain_merge_hdf5_files.py` script must be set to
        True (--no-image True) or False (--no-image False).
        Default set to True.

    prod_id : str
        prod_id for output filename.
    gamma_offset : str
        if gamma files have various off0.Xdeg observations, include the offset within the filename for completeness.
    source_environment : str
        path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script)
         to activate a certain conda environment.
         DEFAULT: `source /fefs/aswg/software/virtual_env/.bashrc; conda activate cta`.
        ! NOTE : train_pipe AND dl1_to_dl2 **MUST** be run with the same environment.

    Returns
    -------

    log_merge : dict (if flag_full_workflow is True)
        dictionary of dictionaries containing the log information of this script and the jobid of the batched job,
        separated by particle

         - log_merge[particle][set_type].keys() = ['logs_script_test or logs_script_train',
                                        'train_path_and_outname_dl1 or test_path_and_outname_dl1', 'jobid']

        ****  otherwise : (if flag_full_workflow is False, by default) ****
        None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO

    return_jobids4train : str (if flag_full_workflow is True)
        jobid of the batched job to be send (for dependencies purposes) to the next stage of the workflow
        (train_pipe), by particle

    return_jobids_debug ; str
        jobids to store in log_reduced.txt - Mainly debug purposes.

    """

    if flag_full_workflow:
        log_merge = {particle: {'training': {}, 'testing': {}}}

        wait_r0_dl1_jobs = particle2jobs_dict[particle]

        return_jobids4train = []
        return_jobids_debug = []

        job_name = {
            'electron': 'e_merge',
            'gamma': 'g_merge',
            'gamma-diffuse': 'gd_merge',
            'proton': 'p_merge',
            'gamma_off0.0deg': 'g0.0_merge',
            'gamma_off0.4deg': 'g0.4_merge'
        }

    else:
        print(f"\n ==== START {os.path.basename(__file__)} ==== \n")

    JOB_LOGS = os.path.join(input_dir, 'job_logs')
    training_filelist = os.path.join(input_dir, 'training.list')
    testing_filelist = os.path.join(input_dir, 'testing.list')
    running_DL1_dir = os.path.join(input_dir, 'DL1')
    DL1_training_dir = os.path.join(running_DL1_dir, 'training')
    DL1_testing_dir = os.path.join(running_DL1_dir, 'testing')
    final_DL1_dir = input_dir.replace('running_analysis', 'DL1')
    logs_destination_dir = input_dir.replace('running_analysis',
                                             'analysis_logs')

    # 1. check job logs
    check_job_logs(JOB_LOGS)

    # 2. check that all files have been created in DL1 based on training and testing lists
    # just check number of files first:
    if not len(os.listdir(DL1_training_dir)) == len(
            read_lines_file(training_filelist)):
        tf = check_files_in_dir_from_file(DL1_training_dir, training_filelist)
        if tf != [] and not flag_full_workflow:
            query_continue(
                "{} files from the training list are not in the `DL1/training` directory:\n{} "
                "Continue ?".format(len(tf), tf))

    if not len(os.listdir(DL1_testing_dir)) == len(
            read_lines_file(testing_filelist)):
        tf = check_files_in_dir_from_file(DL1_testing_dir, testing_filelist)
        if tf != [] and not flag_full_workflow:
            query_continue(
                "{} files from the testing list are not in the `DL1/testing directory:\n{} "
                "Continue ?".format(len(tf), tf))

    if not flag_full_workflow:

        print("\tmerging starts")

        # 3. merge DL1 files
        for set_type in ['testing', 'training']:
            tdir = os.path.join(running_DL1_dir, set_type)

            # dl1 files should (must otherwise you are not trying to merge) already been created
            output_filename = os.listdir(tdir)[0]
            output_filename = 'dl1_' + os.path.basename(
                output_filename.split('_run')[0])

            if particle == 'gamma-diffuse':
                output_filename = output_filename.replace(
                    'gamma', 'gamma-diffuse')
            if gamma_offset is not None:
                output_filename += f'_{gamma_offset}'
            if prod_id is not None:
                output_filename += f'_{prod_id}'
            output_filename += f'_{set_type}.h5'

            output_filename = os.path.join(running_DL1_dir, output_filename)
            print(f"\t\tmerge output: {output_filename}")

            # 3.1 sbatch the jobs (or send them interactively depending) if the script is(not) run as part of the
            # whole workflow
            # filelist = [os.path.join(tdir, f) for f in os.listdir(tdir)]

            cmd = f"lstchain_merge_hdf5_files -d {tdir} -o {output_filename} --no-image {flag_no_image} " \
                  f"--smart {flag_merge}"
            os.system(cmd)

        # 4. move DL1 files in final place
        check_and_make_dir(final_DL1_dir)
        move_dir_content(running_DL1_dir, final_DL1_dir)
        print(f"\tDL1 files have been moved to {final_DL1_dir}")

        # copy lstchain config file there too. HiPeRTA configs are *.txt
        config_files = [
            os.path.join(input_dir, f) for f in os.listdir(input_dir)
            if f.endswith(('.json', '.txt'))
        ]
        for file in config_files:
            shutil.copyfile(
                file, os.path.join(final_DL1_dir, os.path.basename(file)))

        # 5. move running_dir as logs
        check_and_make_dir(logs_destination_dir)
        move_dir_content(input_dir, logs_destination_dir)
        print(f"\tLOGS have been moved to {logs_destination_dir}")

        print(f"\n ==== END {os.path.basename(__file__)} ==== \n")

    else:  # flag_full_workflow == True !

        print(f"\n\tmerging starts - {particle}")

        # 3. merge DL1 files
        wait_both_merges = []

        for set_type in ['testing', 'training']:
            tdir = os.path.join(running_DL1_dir, set_type)

            # just need to take the base name of the file, so we read a processed bunch and take first file
            with open(training_filelist, 'r') as f:
                output_filename = f.readline()

            output_filename = 'dl1_' + os.path.basename(
                output_filename.split('_run')[0])
            if particle == 'gamma-diffuse':
                output_filename = output_filename.replace(
                    'gamma', 'gamma-diffuse')
            if '_off' in particle:
                output_filename += f'_{gamma_offset}'
            output_filename += f'_{prod_id}_{set_type}'
            output_filename += '.h5'

            output_filename = os.path.join(running_DL1_dir, output_filename)
            print(f"\t\tmerge output: {output_filename}")

            # After the workflow the files will be moved, will not stay at output_filename
            if set_type == 'training':
                log_merge[particle][set_type][
                    'train_path_and_outname_dl1'] = os.path.join(
                        final_DL1_dir, os.path.basename(output_filename))
            else:
                log_merge[particle][set_type][
                    'test_path_and_outname_dl1'] = os.path.join(
                        final_DL1_dir, os.path.basename(output_filename))

            cmd = 'sbatch --parsable -p short'
            if wait_r0_dl1_jobs != '':
                cmd += ' --dependency=afterok:' + wait_r0_dl1_jobs

            cmd += f' -J {job_name[particle]} -e slurm-{job_name[particle]}-{set_type}.o ' \
                   f'-o slurm-{job_name[particle]}-{set_type}.e --wrap="{source_environment} ' \
                   f'lstchain_merge_hdf5_files -d {tdir} -o {output_filename} --no-image {flag_no_image} ' \
                   f'--smart {flag_merge}"'

            jobid_merge = os.popen(cmd).read().strip('\n')
            log_merge[particle][set_type][jobid_merge] = cmd

            print(
                f'\t\tSubmitted batch job {jobid_merge} -- {particle}, {set_type}'
            )

            wait_both_merges.append(jobid_merge)
            return_jobids_debug.append(jobid_merge)

        # Out of testing/training loop !

        # 4., 5. & 6. in the case of the full workflow are done in a separate sbatch to wait merge, the three steps:
        # 4 --> move DL1 files in final place
        # 5 --> copy lstchain config file in final_dir too
        # 6 --> move running_dir as logs

        print(f"\tDL1 files will be moved to {final_DL1_dir}")

        base_cmd = 'sbatch --parsable -p short -J {} -e {} -o {} --dependency=afterok:{} ' \
                   '--wrap="python batch_dl1_utils-merge_and_copy.py -s {} -d {} --copy_conf {}"'

        wait_both_merges = ','.join(wait_both_merges)

        # 4 --> move DL1 files in final place
        batch_mv_dl1 = base_cmd.format(
            job_name[particle].split('_')[0] + '_mv_dl1',
            f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_files.e',
            f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_files.o',
            wait_both_merges, running_DL1_dir, final_DL1_dir, 'False')

        jobid_move_dl1 = os.popen(batch_mv_dl1).read().strip('\n')
        log_merge[particle][set_type][jobid_move_dl1] = batch_mv_dl1

        print(
            f'\t\tSubmitted batch job {jobid_move_dl1}. It will move dl1 files when {wait_both_merges} finish.'
        )

        # 5 --> copy lstchain config file in final_dir too
        batch_copy_conf = base_cmd.format(
            job_name[particle].split('_')[0] + '_cp_conf',
            f'slurm-{job_name[particle].split("_")[0]}_cp_config.e',
            f'slurm-{job_name[particle].split("_")[0]}_cp_config.o',
            jobid_move_dl1, input_dir, final_DL1_dir, 'True')

        jobid_copy_conf = os.popen(batch_copy_conf).read().strip('\n')
        log_merge[particle][set_type][jobid_copy_conf] = batch_copy_conf

        print(
            f'\t\tSubmitted batch job {jobid_copy_conf}. It will copy the used config when {jobid_move_dl1} finish.'
        )

        # 6 --> move running_dir to final analysis_logs
        batch_mv_dir = base_cmd.format(
            job_name[particle].split('_')[0] + '_mv_dir',
            f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_direct.e',
            f'slurm-{job_name[particle].split("_")[0]}_mv_DL1_direct.o',
            jobid_copy_conf, input_dir, logs_destination_dir, 'False')

        jobid_move_log = os.popen(batch_mv_dir).read().strip('\n')
        log_merge[particle][set_type][jobid_move_log] = batch_mv_dir

        print(
            f'\t\tSubmitted batch job {jobid_move_log}. It will move running_dir when {jobid_copy_conf} finish.'
        )

        return_jobids4train.append(jobid_move_dl1)

        return_jobids_debug.append(jobid_move_dl1)
        return_jobids_debug.append(jobid_move_log)
        return_jobids_debug.append(jobid_copy_conf)

        print(f"\tLOGS will be moved to {logs_destination_dir}")

        # Little clarification (it will not be clear in log). These keys are stored here for 2 purposes:
        # 1 - train_pipe recover final dl1 names and path.
        # 2 - dl1_to_dl2 recover the jobids of the merged dl1 files; (all dl1 files MUST be merged and moved
        # to dl1_dir), so instead of storing the jobid that merges all the *particle*_dl1 (jobid_merge), it will
        # be store the jobid that move the dl1 final file to dl1_dir. Until this step is not finished, the workflow
        # cannot continue.

        return_jobids4train = ','.join(return_jobids4train)
        return_jobids_debug = ','.join(return_jobids_debug)

        return log_merge, return_jobids4train, return_jobids_debug

Exemple #5

0

Afficher le fichier

Fichier : onsite_mc_r0_to_dl1.py Projet : garciagenrique/LST_scripts

def main(input_dir,
         config_file=None,
         train_test_ratio=0.5,
         random_seed=42,
         n_r0_files_per_dl1_job=0,
         flag_full_workflow=False,
         particle=None,
         prod_id=None,
         source_environment=None,
         offset=None):
    """
    R0 to DL1 MC onsite conversion.


    Parameters
    ----------
    input_dir : str
        path to the files directory to analyse
    config_file :str
        Path to a configuration file. If none is given, a standard configuration is applied
    train_test_ratio :int
        Ratio of training data. Default = 0.5
    random_seed : int
        Random seed for random processes. Default = 42
    n_r0_files_per_dl1_job : int
        Number of r0 files processed by each r0_to_dl1 batched stage. If set to 0 (Default), see below the `usual
        production` case.n_r0_files_per_dl1_job

        If the number of r0 files found in `input_dir` is less than 100, it is consider to be a test on a small
        production. Therefore, the number of r0 files treated per batched stage will be set to 10.

        Usual productions have =>1000 r0 files, in this case, the number of batched jobs will be fixed to 50 (in case
        of gamma and electrons), 80 for (gamma-diffuse) and 125 to protons. This means that there will be batched a
        total of 50+50+80+125 = 305 jobs only for the r0_to_dl1 stage. (there are 1k r0 files for gammas (although 2
        offsets, thus 2k files), 2k r0 files for gd and e- and 5k for protons).

        Default = 0

    particle : str
        particle type for `flag_full_workflow` = True
    offset : str
        gamma offset
    prod_id :str
        Production ID. If None, _v00 will be used, indicating an official base production. Default = None.
    flag_full_workflow : bool
        Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files.
    source_environment : str
        path to a .bashrc file to source (can be configurable for custom runs @ mc_r0_to_dl3 script)
         to activate a certain conda environment.
         DEFAULT: `source /fefs/aswg/software/virtual_env/.bashrc; conda activate cta`.
        ! NOTE : train_pipe AND dl1_to_dl2 **MUST** be run with the same environment.

    Returns
    -------

    jobid2log : dict (if flag_full_workflow is True)

        A dictionary of dictionaries containing the full log information of the script. The first `layer` contains
        only the each jobid that the scripts has batched.

            dict[jobid] = information

        The second layer contains, organized by jobid,
             - the kind of particle that corresponded to the jobid
             - the command that was run to batch the job into the server
             - the path to both the output and error files (job_`jobid`.o and job_`jobid`.e) that were generated
                 when the job was send to the cluster

             dict[jobid].keys() = ['particle', 'sbatch_command', 'jobe_path', 'jobo_path']

             ****  otherwise : (if flag_full_workflow is False, by default) ****
            None is returned -- THIS IS APPLIED FOR THE ARGUMENTS SHOWN BELOW TOO

    jobids_r0_dl1

        A list of all the jobs sent by particle (including test and train set types).

    """
    if not flag_full_workflow:

        print(f"\n ==== START {os.path.basename(__file__)} ==== \n")
        # This formatting should be the same as in `onsite_mc_r0_to_dl3.py`
        today = calendar.datetime.date.today()
        base_prod_id = f'{today.year:04d}{today.month:02d}{today.day:02d}_v{lstchain.__version__}'
        suffix_id = '_v00' if prod_id is None else '_{}'.format(prod_id)
        PROD_ID = base_prod_id + suffix_id

    else:
        # Full prod_id is passed as argument
        PROD_ID = prod_id

    TRAIN_TEST_RATIO = float(train_test_ratio)
    RANDOM_SEED = random_seed
    #NFILES_PER_DL1 = n_files_per_dl1
    #DESIRED_DL1_SIZE_MB = 1000

    #N_R0_PER_DL1_JOB = n_r0_files_per_dl1_job

    DL0_DATA_DIR = input_dir

    if source_environment is not None:
        manage_source_env_r0_dl1(source_and_env=source_environment,
                                 file=os.path.abspath('./core_list.sh'))

    ##############################################################################

    print(f"Working on DL0 files in {DL0_DATA_DIR}")

    check_data_path(DL0_DATA_DIR)

    raw_files_list = get_input_filelist(DL0_DATA_DIR)

    if len(raw_files_list) < 100:
        N_R0_PER_DL1_JOB = 10
    elif n_r0_files_per_dl1_job == 0:
        if 'gamma' in input_dir:
            N_R0_PER_DL1_JOB = 25
        elif 'gamma-diffuse' in input_dir or 'electron' in input_dir:
            N_R0_PER_DL1_JOB = 50
        elif 'proton' in input_dir:
            N_R0_PER_DL1_JOB = 125
        else:
            N_R0_PER_DL1_JOB = 50
    else:
        N_R0_PER_DL1_JOB = n_r0_files_per_dl1_job

    # if NFILES_PER_DL1 == 0:
    #     size_dl0 = os.stat(raw_files_list[0]).st_size / 1e6
    #     reduction_dl0_dl1 = 5
    #     size_dl1 = size_dl0 / reduction_dl0_dl1
    #     NFILES_PER_DL1 = max(1, int(DESIRED_DL1_SIZE_MB / size_dl1))

    random.seed(RANDOM_SEED)
    random.shuffle(raw_files_list)

    number_files = len(raw_files_list)
    ntrain = int(number_files * TRAIN_TEST_RATIO)
    ntest = number_files - ntrain

    training_list = raw_files_list[:ntrain]
    testing_list = raw_files_list[ntrain:]

    print("\t{} raw files".format(number_files))
    print("\t{} files in training dataset".format(ntrain))
    print("\t{} files in test dataset".format(ntest))

    with open('training.list', 'w+') as newfile:
        for f in training_list:
            newfile.write(f)
            newfile.write('\n')

    with open('testing.list', 'w+') as newfile:
        for f in testing_list:
            newfile.write(f)
            newfile.write('\n')

    if flag_full_workflow and 'off' in particle:
        # join(BASE_PATH, 'DL0', OBS_DATE, '{particle}', ZENITH, POINTING, 'PLACE_4_PROD_ID', GAMMA_OFF)
        DL0_DATA_DIR = DL0_DATA_DIR.split(offset)[0]  # Take out /off0.Xdeg
        RUNNING_DIR = os.path.join(
            DL0_DATA_DIR.replace('DL0', 'running_analysis'), PROD_ID, offset)
    else:
        RUNNING_DIR = os.path.join(
            DL0_DATA_DIR.replace('DL0', 'running_analysis'), PROD_ID)

    JOB_LOGS = os.path.join(RUNNING_DIR, 'job_logs')
    DL1_DATA_DIR = os.path.join(RUNNING_DIR, 'DL1')
    # DIR_LISTS_BASE = os.path.join(RUNNING_DIR, 'file_lists')
    # ADD CLEAN QUESTION

    print("\tRUNNING_DIR: \t", RUNNING_DIR)
    print("\tJOB_LOGS DIR: \t", JOB_LOGS)
    print("\tDL1 DATA DIR: \t", DL1_DATA_DIR)

    for directory in [RUNNING_DIR, DL1_DATA_DIR, JOB_LOGS]:
        if flag_full_workflow:
            check_and_make_dir_without_verification(directory)
        else:
            check_and_make_dir(directory)

    # dumping the training and testing lists and splitting them in sub-lists for parallel jobs

    jobid2log = {}
    jobids_r0_dl1 = []

    for set_type in 'training', 'testing':
        if set_type == 'training':
            list_type = training_list
        else:
            list_type = testing_list
        dir_lists = os.path.join(RUNNING_DIR, 'file_lists_' + set_type)
        output_dir = os.path.join(RUNNING_DIR, 'DL1')
        output_dir = os.path.join(output_dir, set_type)

        if flag_full_workflow:
            check_and_make_dir_without_verification(dir_lists)
            check_and_make_dir_without_verification(output_dir)
        else:
            check_and_make_dir(dir_lists)
            check_and_make_dir(output_dir)

        print("\toutput dir: \t", output_dir)

        number_of_sublists = len(list_type) // N_R0_PER_DL1_JOB + int(
            len(list_type) % N_R0_PER_DL1_JOB > 0)
        for i in range(number_of_sublists):
            output_file = os.path.join(dir_lists,
                                       '{}_{}.list'.format(set_type, i))
            with open(output_file, 'w+') as out:
                for line in list_type[i * N_R0_PER_DL1_JOB:N_R0_PER_DL1_JOB *
                                      (i + 1)]:
                    out.write(line)
                    out.write('\n')
        print(f'\t{number_of_sublists} files generated for {set_type} list')

        # LSTCHAIN #
        counter = 0
        save_job_ids = []

        for file in os.listdir(dir_lists):
            if set_type == 'training':
                jobo = os.path.join(JOB_LOGS, f"job{counter}_train.o")
                jobe = os.path.join(JOB_LOGS, f"job{counter}_train.e")
            else:
                jobo = os.path.join(JOB_LOGS, f"job{counter}_test.o")
                jobe = os.path.join(JOB_LOGS, f"job{counter}_test.e")
            cc = ' -c {}'.format(
                config_file) if config_file is not None else ' '

            base_cmd = f'core_list.sh "lstchain_mc_r0_to_dl1 -o {output_dir} {cc}"'

            # recover or not the jobid depending of the workflow mode
            if not flag_full_workflow:  # Run interactively

                cmd = f'sbatch -p short -e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}'
                # print(cmd)
                os.system(cmd)

            else:  # flag_full_workflow == True !
                job_name = {
                    'electron': 'r0dl1_e',
                    'gamma': 'r0dl1_g',
                    'gamma-diffuse': 'r0dl1_gd',
                    'proton': 'r0dl1_p',
                    'gamma_off0.0deg': 'g0.0_r0dl1',
                    'gamma_off0.4deg': 'g0.4_r0dl1'
                }

                if particle == 'proton':
                    queue = 'long'
                else:
                    queue = 'long'  # TODO change to short after prod5 check

                cmd = f'sbatch --parsable -p {queue} -J {job_name[particle]} ' \
                      f'-e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}'

                jobid = os.popen(cmd).read().strip('\n')
                jobids_r0_dl1.append(jobid)

                # Fill the dictionaries if IN workflow mode
                jobid2log[jobid] = {}
                jobid2log[jobid]['particle'] = particle
                jobid2log[jobid]['set_type'] = set_type
                jobid2log[jobid]['jobe_path'] = jobe
                jobid2log[jobid]['jobo_path'] = jobo
                jobid2log[jobid]['sbatch_command'] = cmd

                # print(f'\t\t{cmd}')
                # print(f'\t\tSubmitted batch job {jobid}')
                save_job_ids.append(jobid)

            counter += 1

        if flag_full_workflow:
            print(f"\n\t{counter} jobs submitted - {particle} {set_type}. "
                  f"From jobid {save_job_ids[0]} - {save_job_ids[-1]}\n")
            time.sleep(1)  # Avoid collapsing LP cluster

    # copy this script and config into working dir
    shutil.copyfile(__file__,
                    os.path.join(RUNNING_DIR, os.path.basename(__file__)))
    if config_file is not None:
        shutil.copyfile(
            config_file,
            os.path.join(RUNNING_DIR, os.path.basename(config_file)))

    # save file lists into logs
    shutil.move('testing.list', os.path.join(RUNNING_DIR, 'testing.list'))
    shutil.move('training.list', os.path.join(RUNNING_DIR, 'training.list'))

    # create log dictionary and return it if IN workflow mode
    if flag_full_workflow:
        return jobid2log, jobids_r0_dl1

    else:
        print(f"\n ==== END {os.path.basename(__file__)} ==== \n")

Exemple #6

0

Afficher le fichier

Fichier : onsite_mc_hiperta_r0_to_dl1lstchain.py Projet : garciagenrique/LST_scripts

def main(input_dir, config_rta_file=None, train_test_ratio=0.5, random_seed=42, n_r0_files_per_dl1_job=0,
         flag_full_workflow=False, particle=None, prod_id=None, offset=None, keep_rta_file=False, lst_config=None):
    """
    same as for r0_to_dl1 lst-like but with the exceptions of rta

    Parameters
    ----------
    input_dir : str
        path to the files directory to analyse
    config_rta_file : str
        Path to a HiPeRTA configuration file. If none is given, a standard configuration is applied
    train_test_ratio : int
        Ratio of training data. Default = 0.5
    random_seed : int
        Random seed for random processes. Default = 42
    n_r0_files_per_dl1_job : int
        Number of r0 files processed by each r0_to_dl1 batched stage. If set to 0 (Default), see below the `usual
        production` case.n_r0_files_per_dl1_job
    particle : str
        particle type for `flag_full_workflow` = True
    offset : str
        gamma offset
    prod_id :str
        Production ID. If None, _v00 will be used, indicating an official base production. Default = None.
    flag_full_workflow : bool
        Boolean flag to indicate if this script is run as part of the workflow that converts r0 to dl2 files.
    keep_rta_file : bool
        Argument to be passed to the hiperta_r0_to_dl1lstchain script, which runs the hiperta_r0_dl1 and
        re-organiser stage
    lst_config: str
        path used just to copy the config to `running analysis`

    Returns
    -------

    """

    if not flag_full_workflow:
        # This formatting should be the same as in `onsite_mc_r0_to_dl3_hiperta.py`
        print("\n ==== START {} ==== \n".format(os.path.basename(__file__)))
        today = calendar.datetime.date.today()
        base_prod_id = f'{today.year:04d}{today.month:02d}{today.day:02d}_vRTA'
        suffix_id = '_v00' if prod_id is None else '_{}'.format(prod_id)
        PROD_ID = base_prod_id + suffix_id
    else:
        # Full prod_id is passed as argument
        PROD_ID = prod_id

    TRAIN_TEST_RATIO = float(train_test_ratio)
    RANDOM_SEED = random_seed
    #NFILES_PER_DL1 = int(n_files_per_dl1)
    #DESIRED_DL1_SIZE_MB = 1000

    DL0_DATA_DIR = input_dir

    ##############################################################################

    print("Working on MCHDF5 R0 files in {}".format(DL0_DATA_DIR))

    check_data_path(DL0_DATA_DIR)

    raw_files_list = get_input_filelist(DL0_DATA_DIR)

    if len(raw_files_list) < 100:
        N_R0_PER_DL1_JOB = 10
    elif n_r0_files_per_dl1_job == 0:
        if 'gamma' in input_dir:
            N_R0_PER_DL1_JOB = 25
        elif 'gamma-diffuse' in input_dir or 'electron' in input_dir:
            N_R0_PER_DL1_JOB = 50
        elif 'proton' in input_dir:
            N_R0_PER_DL1_JOB = 125
        else:
            N_R0_PER_DL1_JOB = 50
    else:
        N_R0_PER_DL1_JOB = n_r0_files_per_dl1_job

    # if NFILES_PER_DL1 == 0:
    #     size_dl0 = os.stat(raw_files_list[0]).st_size / 1e6
    #     reduction_dl0_dl1 = 5
    #     size_dl1 = size_dl0 / reduction_dl0_dl1
    #     NFILES_PER_DL1 = max(1, int(DESIRED_DL1_SIZE_MB / size_dl1))

    random.seed(RANDOM_SEED)
    random.shuffle(raw_files_list)

    number_files = len(raw_files_list)
    ntrain = int(number_files * TRAIN_TEST_RATIO)
    ntest = number_files - ntrain

    training_list = raw_files_list[:ntrain]
    testing_list = raw_files_list[ntrain:]

    print("\t{} raw files".format(number_files))
    print("\t{} files in training dataset".format(ntrain))
    print("\t{} files in test dataset".format(ntest))

    with open('training.list', 'w+') as newfile:
        for f in training_list:
            newfile.write(f)
            newfile.write('\n')

    with open('testing.list', 'w+') as newfile:
        for f in testing_list:
            newfile.write(f)
            newfile.write('\n')

    if flag_full_workflow and 'off' in particle:
        # join(BASE_PATH, 'DL0', OBS_DATE, '{particle}', ZENITH, POINTING, 'PLACE_4_PROD_ID', GAMMA_OFF)
        DL0_DATA_DIR = DL0_DATA_DIR.split(offset)[0]  # Take out /off0.Xdeg
        RUNNING_DIR = os.path.join(DL0_DATA_DIR.replace('R0', 'running_analysis'), PROD_ID, offset)
    else:
        RUNNING_DIR = os.path.join(DL0_DATA_DIR.replace('R0', 'running_analysis'), PROD_ID)

    JOB_LOGS = os.path.join(RUNNING_DIR, 'job_logs')
    DL1_DATA_DIR = os.path.join(RUNNING_DIR, 'DL1')
    # DIR_LISTS_BASE = os.path.join(RUNNING_DIR, 'file_lists')
    # ADD CLEAN QUESTION

    print("\tRUNNING_DIR: \t", RUNNING_DIR)
    print("\tJOB_LOGS DIR: \t", JOB_LOGS)
    print("\tDL1 DATA DIR: \t", DL1_DATA_DIR)

    for directory in [RUNNING_DIR, DL1_DATA_DIR, JOB_LOGS]:
        if flag_full_workflow:
            check_and_make_dir_without_verification(directory)
        else:
            check_and_make_dir(directory)

    # dumping the training and testing lists and splitting them in sub-lists for parallel jobs

    jobid2log = {}
    jobids_RTA_r0_dl1_reorganized = []

    for set_type in 'training', 'testing':
        if set_type == 'training':
            list = training_list
        else:
            list = testing_list
        dir_lists = os.path.join(RUNNING_DIR, 'file_lists_' + set_type)
        output_dir = os.path.join(RUNNING_DIR, 'DL1')
        output_dir = os.path.join(output_dir, set_type)

        if flag_full_workflow:
            check_and_make_dir_without_verification(dir_lists)
            check_and_make_dir_without_verification(output_dir)
        else:
            check_and_make_dir(dir_lists)
            check_and_make_dir(output_dir)

        print("\toutput dir: \t", output_dir)

        number_of_sublists = len(list) // N_R0_PER_DL1_JOB + int(len(list) % N_R0_PER_DL1_JOB > 0)
        for i in range(number_of_sublists):
            output_file = os.path.join(dir_lists, '{}_{}.list'.format(set_type, i))
            with open(output_file, 'w+') as out:
                for line in list[i * N_R0_PER_DL1_JOB:N_R0_PER_DL1_JOB * (i + 1)]:
                    out.write(line)
                    out.write('\n')
        print('\t{} files generated for {} list'.format(number_of_sublists, set_type))

        # HiPeRTA ###
        counter = 0
        save_job_ids = []

        for file in os.listdir(dir_lists):
            if set_type == 'training':
                jobo = os.path.join(JOB_LOGS, "job{}_train.o".format(counter))
                jobe = os.path.join(JOB_LOGS, "job{}_train.e".format(counter))
            else:
                jobo = os.path.join(JOB_LOGS, "job{}_test.o".format(counter))
                jobe = os.path.join(JOB_LOGS, "job{}_test.e".format(counter))

            # TODO for the moment is only user enrique.garcia who has installed HiPeRTA  ##
            cc = ' -c {}'.format(config_rta_file) if config_rta_file is not None else ' '
            base_cmd = f'core_list_hiperta.sh "/home/enrique.garcia/software/LST_scripts/lst_scripts/' \
                       f'hiperta_r0_to_dl1lstchain.py -o {output_dir} -k {keep_rta_file} {cc}"'

            # recover or not the jobid depending of the workflow mode
            if not flag_full_workflow:

                cmd = f'sbatch -p short -e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}'

                # print(cmd)
                os.system(cmd)

            else:  # flag_full_workflow == True !
                job_name = {'electron': 'e_RTA-r0dl1',
                            'gamma': 'g_RTA-r0dl1',
                            'gamma-diffuse': 'gd_RTA-r0dl1',
                            'proton': 'p_RTA-r0dl1',
                            'gamma_off0.0deg': 'g0.0_RTA-r0dl1',
                            'gamma_off0.4deg': 'g0.4_RTA-r0dl1'
                            }

                if particle == 'proton':
                    queue = 'long'
                else:
                    queue = 'long'  # TODO change to short after prod5 check

                cmd = f'sbatch --parsable -p {queue} -J {job_name[particle]} ' \
                      f'-e {jobe} -o {jobo} {base_cmd} {os.path.join(dir_lists, file)}'

                jobid = os.popen(cmd).read().strip('\n')
                jobids_RTA_r0_dl1_reorganized.append(jobid)

                # Fill the dictionaries if IN workflow mode
                jobid2log[jobid] = {}
                jobid2log[jobid]['particle'] = particle
                jobid2log[jobid]['set_type'] = set_type
                jobid2log[jobid]['jobe_path'] = jobe
                jobid2log[jobid]['jobo_path'] = jobo
                jobid2log[jobid]['sbatch_command'] = cmd

                # print(f'\t\t{cmd}')
                # print(f'\t\tSubmitted batch job {jobid}')
                save_job_ids.append(jobid)

            counter += 1

        if flag_full_workflow:
            print(f"\n\t{counter} jobs submitted - {particle} {set_type}. "
                  f"From jobid {save_job_ids[0]} - {save_job_ids[-1]}\n")
            time.sleep(1)  # Avoid collapsing LP cluster

    # copy this script itself into logs
    shutil.copyfile(__file__, os.path.join(RUNNING_DIR, os.path.basename(__file__)))
    # copy config file into logs
    if config_rta_file is not None:
        shutil.copy(config_rta_file, os.path.join(RUNNING_DIR, os.path.basename(config_rta_file)))
    if lst_config is not None:
        shutil.copy(lst_config, os.path.join(RUNNING_DIR, os.path.basename(lst_config)))

    # save file lists into logs
    shutil.move('testing.list', os.path.join(RUNNING_DIR, 'testing.list'))
    shutil.move('training.list', os.path.join(RUNNING_DIR, 'training.list'))

    # create log dictionary and return it if IN workflow mode
    if flag_full_workflow:
        return jobid2log, jobids_RTA_r0_dl1_reorganized

    else:
        print("\n ==== END {} ==== \n".format(os.path.basename(__file__)))