def are_arguments_valid(args): # Check for data_tiers if not argparse_helper.is_valid(args, 'data_tiers', ['nanoaod', 'miniaod']): return False, 'data_tier: ' + str( args['data_tiers']) + ' is not valid.' # Check for mc_data if not argparse_helper.is_valid(args, 'mc_data', ['mc', 'data']): return False, 'mc_data: ' + str(args['mc_data']) + ' is not valid.' # Check if files exists with in_json_prefix if 'mc' in args['mc_data']: t_path = os.path.join( args['in_json_folder'], args['in_json_prefix'] + 'mc_dataset_files_info.json') if not os.path.isfile(t_path): return False, t_path + ' does not exists.' if 'data' in args['mc_data']: t_path = os.path.join( args['in_json_folder'], args['in_json_prefix'] + 'data_dataset_files_info.json') if not os.path.isfile(t_path): return False, t_path + ' does not exists.' # Check if output folder exits if not os.path.isdir(args['out_results_folder']): return False, 'out_results_folder: ' + args[ 'out_results_folder'] + " doesn't exist." # Check if files exists with out_results_prefix if 'mc' in args['mc_data']: t_path = os.path.join(args['out_results_folder'], args['out_results_prefix'] + 'mc_dataset_files') if os.path.isfile(t_path): overwrite = ask.ask_key( t_path + ' already exists. Do you want to overwrite? (y/n) Default is n. ', ['y', 'n'], 'n') if overwrite == 'n': return False, t_path + ' already exists.' if 'data' in args['mc_data']: t_path = os.path.join( args['out_results_folder'], args['out_results_prefix'] + 'data_dataset_files') if os.path.isfile(t_path): overwrite = ask.ask_key( t_path + ' already exists. Do you want to overwrite? (y/n) Default is n. ', ['y', 'n'], 'n') if overwrite == 'n': return False, t_path + ' already exists.' return True, ''
def jobs_to_submit(self, jobs_info, statuses, job_check_script=None): default_info = jobs_info[0] for job_index_raw, job_info in enumerate(jobs_info[1:]): job_index = job_index_raw + 1 job_id, multiple_index = self.get_job_id_multiple_index( job_info['job_identifier']) job_status = job_info['job_status'] if job_status not in statuses: continue job_log_string = self.get_job_log_string(job_id, multiple_index) print('--------') print('job_id: ' + str(job_id) + ', multiple_index: ' + str(multiple_index)) print('job_script: ' + job_info['submission_command']) print('--------Job Log--------') print(job_log_string.rstrip()) print('--------Job Log--------') if (job_info['job_identifier'] in job_info['job_trials_reason']): print( 'fail reason: ' + job_info['job_trials_reason'][job_info['job_identifier']]) else: print('fail reason: unknown') print('--------') if job_check_script: print( self.get_check_command(jobs_info, job_check_script, job_index)) print('--------') is_rerun = ask.ask_key( 'Do you want to re-run? (y/n) Default is y. ', ['y', 'n'], 'y') if is_rerun == 'y': job_info['job_status'] = 'to_submit'
def are_arguments_valid(args): if not os.path.isfile(args['jobs_info_filename']): return False, 'jobs_info_filename: '+args['jobs_info_filename']+" doesn't exist." if os.path.isfile(args['output_json']): overwrite = ask.ask_key(args['output_json']+' already exits. Do you want to overwrite? (y/n) Default is n. ', ['y','n'], 'n') if overwrite == 'n': return False, 'output_json: '+args['output_json']+' already exits.' # TODO fix below #if args['jobscript_check_filename'] != None: # if not os.path.isfile(args['jobscript_check_filename']): # return False, 'jobscript_check_filename: '+args['jobscript_check_filename']+" doesn't exist." if args['jobscript_check_filename'] != None: if not which(args['jobscript_check_filename']): return False, 'jobscript_check_filename: '+args['jobscript_check_filename']+" isn't executable." valid_statuses = ['submitted', 'done', 'fail', 'success', 'to_submit'] if len(args['statuses']) == 0: return False, 'len(statuses) is 0' for status in args['statuses']: if status not in valid_statuses: return False, status + ' is not valid' return True, ''
def ask_change_key_globally(): #change_key_globally = ask_yn('Do you want to change key globally? (y/n) Default is n. ', 'n') change_key_globally = ask.ask_key( 'Do you want to change key globally? (y/n) Default is n. ', ['y', 'n'], 'n') if change_key_globally == 'n': return 'n', None, None key, value = ask_key_value() return change_key_globally, key, value
def are_arguments_valid(args): if not os.path.isfile(args['jobs_info_filename']): return False, 'jobs_info_filename: ' + args[ 'jobs_info_filename'] + " doesn't exist." if os.path.isfile(args['output_json']): overwrite = ask.ask_key( args['output_json'] + ' already exists. Do you want to overwrite? (y/n) Default is n. ', ['y', 'n'], 'n') if overwrite == 'n': return False, 'output_json: ' + args[ 'output_json'] + ' already exists.' return True, ''
def are_arguments_valid(args): if os.path.isfile(args['jobs_info_filename']): overwrite = ask.ask_key( args['jobs_info_filename'] + ' already exits. Do you want to overwrite? (y/n) Default is n. ', ['y', 'n'], 'n') if overwrite == 'n': return False, args['jobs_info_filename'] + ' already exist.' if not os.path.isfile(args['command_list_filename']): return False, args['command_list_filename'] + " does not exist." if not which(args['command_list_filename']): return False, "Can't execute " + args['command_list_filename'] + "." return True, ''
def jobs_to_submit(self, jobs_info, statuses, job_check_script=None): default_info = jobs_info[0] for job_index_raw, job_info in enumerate(jobs_info[1:]): job_index = job_index_raw + 1 job_id, multiple_index = self.get_job_id_multiple_index( job_info['job_identifier']) job_status = job_info['job_status'] if job_status not in statuses: continue job_log_string = self.get_job_log_string(job_id, multiple_index) print('--------') print(job_log_string.rstrip()) print('--------') if job_check_script: print( self.get_check_command(jobs_info, job_check_script, job_index)) print('--------') is_rerun = ask.ask_key( 'Do you want to re-run? (y/n) Default is y. ', ['y', 'n'], 'y') if is_rerun == 'y': job_info['job_status'] = 'to_submit'
def are_arguments_valid(args): # Check for data_tiers if not argparse_helper.is_valid(args, 'data_tiers', ['nanoaod', 'miniaod']): return False, 'data_tier: '+str(args['data_tiers'])+' is not valid.' # Check for mc_data if not argparse_helper.is_valid(args, 'mc_data', ['mc', 'data']): return False, 'mc_data: '+str(args['mc_data'])+' is not valid.' # Check for meta files if not os.path.isdir(args['meta_folder']): return False, 'meta_folder: '+args['meta_folder']+" doesn't exist." t_path = os.path.join(args['meta_folder'],'mc_dataset_common_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_common: '+t_path+" doesn't exist." t_path = os.path.join(args['meta_folder'],'mc_dataset_2016_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_2016_names: '+t_path+" doesn't exist." t_path = os.path.join(args['meta_folder'],'mc_dataset_2017_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_2017_names: '+t_path+" doesn't exist." t_path = os.path.join(args['meta_folder'],'mc_dataset_2018_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_2018_names: '+t_path+" doesn't exist." if 'mc' in args['mc_data']: t_path = os.path.join(args['meta_folder'],'mc_tag_meta') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_tag_meta: '+t_path+" doesn't exist." if 'data' in args['mc_data']: t_path = os.path.join(args['meta_folder'],'data_tag_meta') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_data_tag_meta: '+t_path+" doesn't exist." # Check if output folder exits if not os.path.isdir(args['out_json_folder']): return False, 'out_json_folder: '+args['out_json_folder']+" doesn't exist." # Check if files exists with in_json_prefix if 'mc' in args['mc_data']: t_path = os.path.join(args['in_json_folder'], args['in_json_prefix']+'mc_datasets.json') if not os.path.isfile(t_path): return False, t_path+' does not exists.' if 'data' in args['mc_data']: t_path = os.path.join(args['in_json_folder'], args['in_json_prefix']+'data_datasets.json') if not os.path.isfile(t_path): return False, t_path+' does not exists.' # Check if files exists with out_json_prefix if 'mc' in args['mc_data']: t_path = os.path.join(args['out_json_folder'], args['out_json_prefix']+'mc_multiple_selection.json') if os.path.isfile(t_path): overwrite = ask.ask_key(t_path+' already exists. Do you want to overwrite? (y/n) Default is n. ', ['y','n'], 'n') if overwrite == 'n': return False, t_path+' already exists.' t_path = os.path.join(args['out_json_folder'], args['out_json_prefix']+'mc_datasets.json') if os.path.isfile(t_path): overwrite = ask.ask_key(t_path+' already exists. Do you want to overwrite? (y/n) Default is n. ', ['y','n'], 'n') if overwrite == 'n': return False, t_path+' already exists.' t_path = os.path.join(args['out_json_folder'], args['out_json_prefix']+'data_datasets.json') if os.path.isfile(t_path): overwrite = ask.ask_key(t_path+' already exists. Do you want to overwrite? (y/n) Default is n. ', ['y','n'], 'n') if overwrite == 'n': return False, t_path+' already exists.' return True, ''
def are_arguments_valid(args): # Will not check for mc_data_sig. Can't know what nanoaod_tag will be. #if not argparse_helper.is_valid(args, 'mc_data', ['mc', 'data']): # return False, 'mc_data: '+str(args['mc_data'])+' is not valid.' if len(args['mc_data_sig']) != 1: return False, 'mc_data: ' + str( args['mc_data_sig']) + ' is not valid. Must choose only one.' # Check for meta files if not os.path.isdir(args['meta_folder']): return False, 'meta_folder: ' + args['meta_folder'] + " doesn't exist." t_path = os.path.join(args['meta_folder'], 'mc_dataset_common_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_common: ' + t_path + " doesn't exist." t_path = os.path.join(args['meta_folder'], 'mc_dataset_2016_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_2016_names: ' + t_path + " doesn't exist." t_path = os.path.join(args['meta_folder'], 'mc_dataset_2017_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_2017_names: ' + t_path + " doesn't exist." t_path = os.path.join(args['meta_folder'], 'mc_dataset_2018_names') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_dataset_2018_names: ' + t_path + " doesn't exist." if 'data' not in args['mc_data_sig']: t_path = os.path.join(args['meta_folder'], 'mc_tag_meta') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_mc_tag_meta: ' + t_path + " doesn't exist." if 'data' in args['mc_data_sig']: t_path = os.path.join(args['meta_folder'], 'data_tag_meta') if not os.path.isfile(os.path.join(t_path)): return False, 'meta_data_tag_meta: ' + t_path + " doesn't exist." # Check if files exists with in_json_prefix if 'data' not in args['mc_data_sig']: t_path = os.path.join(args['in_json_folder'], args['in_datasets_prefix'] + 'mc_datasets.json') if not os.path.isfile(t_path): return False, t_path + ' does not exists.' t_path = os.path.join( args['in_json_folder'], args['in_files_prefix'] + 'mc_dataset_files_info.json') if not os.path.isfile(t_path): return False, t_path + ' does not exists.' if 'data' in args['mc_data_sig']: t_path = os.path.join( args['in_json_folder'], args['in_datasets_prefix'] + 'data_datasets.json') if not os.path.isfile(t_path): return False, t_path + ' does not exists.' t_path = os.path.join( args['in_json_folder'], args['in_files_prefix'] + 'data_dataset_files_info.json') if not os.path.isfile(t_path): return False, t_path + ' does not exists.' ## Check if files exists #if not os.path.isfile(args['in_dataset_files_info']): # return False, args['in_dataset_files_info']+' does not exists.' if not os.path.isdir(args['target_base']): return False, args['target_base'] + ' does not exists.' # Check if files exists with out_jsons_prefix if os.path.isfile(args['out_command_lines']): overwrite = ask.ask_key( args['out_command_lines'] + ' already exists. Do you want to overwrite? (y/n) Default is n. ', ['y', 'n'], 'n') if overwrite == 'n': return False, args['out_command_lines'] + ' already exists.' return True, ''
#print('Files to download') #for x in files_to_download: print(' '+x) if len(files_to_remove) != 0: print('Files_to_remove') for x in files_to_remove: print(' ' + x) #for filename in target_file_info: # files_to_download.append(filename) # Make script for downloading target_folder = os.path.join(args['target_base'], mid_folder) if not os.path.exists(target_folder): make_dir = ask.ask_key( 'Should directory:' + target_folder + ' be made? (y/n) Default is y.', ['y', 'n'], 'n') if make_dir == 'y': os.makedirs(target_folder) command_list_filename = args['out_command_lines'] command_list_string = '' command_list_string = '' command_list_string += '#!/bin/env python\n' #command_list_string += "base_command = './run_scripts/copy_aods.py'\n" command_list_string += "base_command = '" + os.environ[ 'JB_DATASETS_DIR'] + "/bin/copy_aods.py'\n" command_list_string += "base_folder = '" + args['target_base'] + "/'\n" command_list_string += "mid_folder = '" + mid_folder + "'\n" command_list_string += "print('# [global_key] dataset_files_info_filename : " + dataset_files_info_filename + "')\n"
statuses = args['statuses'] # jobs_info = ({'command_script':command_script, 'other_global_key':other_global_key, 'ignore_keys':('job_id', 'job_status', ...)},{'key_for_job':key_for_job},{'key_for_job':key_for_job},...) jobs_info = nested_dict.load_json_file(jobs_info_filename) # key_value = (key, value) print('[Global keys]\n ' + '\n '.join([x + ': ' + jobs_info[0][x] for x in jobs_info[0]])) is_change_key_globally, key, value = ask_change_key_globally() if is_change_key_globally == 'y': change_key_globally(key, value, jobs_info) number_failed_jobs = queue.get_number_jobs(jobs_info, ['fail']) print('Number of failed jobs: ' + str(number_failed_jobs)) is_all_rerun = ask.ask_key( 'Do you want to re-run all failed jobs? (y/n) Default is n. ', ['y', 'n'], 'n') if is_all_rerun == 'y': queue.all_jobs_to_submit(jobs_info, statuses) # Ask if job should be rerun if is_all_rerun == 'n': queue.jobs_to_submit(jobs_info, statuses, jobscript_check_filename) #queue.print_jobs_status(jobs_info) #filtered_jobs_info = queue.filtered_jobs_info(jobs_info, ['to_submit']) filtered_jobs_info = jobs_info queue.print_jobs_status(filtered_jobs_info) nested_dict.save_json_file(filtered_jobs_info, output_json)
jobscript_check_filename = args['jobscript_check_filename'] statuses = args['statuses'] # jobs_info = ({'command_script':command_script, 'other_global_key':other_global_key, 'ignore_keys':('job_id', 'job_status', ...)},{'key_for_job':key_for_job},{'key_for_job':key_for_job},...) jobs_info = nested_dict.load_json_file(jobs_info_filename) # key_value = (key, value) print('[Global keys]\n ' + '\n '.join([x + ': ' + jobs_info[0][x] for x in jobs_info[0]])) is_change_key_globally, key, value = ask_change_key_globally() if is_change_key_globally == 'y': change_key_globally(key, value, jobs_info) number_failed_jobs = queue.get_number_jobs(jobs_info, statuses) print('Number of jobs that can be rerun: ' + str(number_failed_jobs)) is_all_rerun = ask.ask_key('Check logs to re-run? (y/n) Default is y. ', ['y', 'n'], 'y') if is_all_rerun == 'n': queue.all_jobs_to_submit(jobs_info, statuses) # Ask if job should be rerun if is_all_rerun == 'y': queue.jobs_to_submit(jobs_info, statuses, jobscript_check_filename) #queue.print_jobs_status(jobs_info) #filtered_jobs_info = queue.filtered_jobs_info(jobs_info, ['to_submit']) filtered_jobs_info = jobs_info queue.print_jobs_status(filtered_jobs_info) nested_dict.save_json_file(filtered_jobs_info, output_json)