def write_dataset_files(dataset_files_info_filename, files_filename): files = [] # datasets_files_info[dataset][filename] = {'number_events':int, 'check_sum':int, 'modification_date':int, 'file_size':int} dataset_files_info = nested_dict.load_json_file( dataset_files_info_filename) for dataset in dataset_files_info: for filename in dataset_files_info[dataset]: files.append(datasets.filename_to_parsed(filename)) write_list(files, files_filename) print('Wrote to ' + files_filename)
def check_entries(job_argument_string): #try: print(job_argument_string) args = get_args(['dataset_files_info_filename', 'command'], job_argument_string) args_command = get_args_command(args['command']) print(args) print(args_command) file_path = get_file_path(args_command['output_path'], "", args_command['input_path']) #print(file_path) if not os.path.isfile(file_path): # Wait for file to appear on raid file_exists = False for iWait in range(10): time.sleep(10) if os.path.isfile(file_path): file_exists = True break if not file_exists: return '[For queue_system] fail: no file named ' + file_path root_file = ROOT.TFile.Open(file_path) if not root_file: return '[For queue_system] fail: Failed in opening ' + file_path root_tree = root_file.Get('Events') root_number_entries = root_tree.GetEntries() #print(args['dataset_files_info_filename'][1:-1]) #datasets_files_info[dataset][filename] = {'number_events':number_events} dataset_files_info = nested_dict.load_json_file( args['dataset_files_info_filename'], False) path_to_keys_dataset_files_info = datasets.get_path_to_keys_dataset_files_info( dataset_files_info) keys = path_to_keys_dataset_files_info[args_command['input_path']] #print(keys) #print(nested_dict.get_item_nested_dict(dataset_files_info,keys)) dataset_number_entries = nested_dict.get_item_nested_dict( dataset_files_info, keys)['number_events'] #print(root_number_entries) #print(dataset_number_entries) if root_number_entries == dataset_number_entries: return '[For queue_system] success' else: return '[For queue_system] fail: root_number_entries: ' + str( root_number_entries) + ' and dataset_number_entries: ' + str( dataset_number_entries) + ' do not match'
def update_datasets_files_json(path_datasets_filename, in_dataset_files_info_filename, out_dataset_files_info_filename): list_dataset = dataset_files.get_list_dataset(path_datasets_filename) #dataset_files_info[dataset][filename] = {'number_events':number_events} in_dataset_files_info = nested_dict.load_json_file( in_dataset_files_info_filename) in_list_dataset = in_dataset_files_info.keys() append_list_dataset = list(set(list_dataset) - set(in_list_dataset)) remove_list_dataset = list(set(in_list_dataset) - set(list_dataset)) # Get files for each dataset append_dataset_file_commands = dataset_files.make_dataset_file_commands( append_list_dataset) #dataset_file_commands = [[dataset, commands]] append_dataset_file_results = dataset_files.run_list_command( append_dataset_file_commands) #datasets_files_info[dataset][filename] = {'number_events':number_events} append_dataset_files_info = dataset_files.parse_dataset_file_results( append_dataset_file_results) # Get meta for each file append_dataset_meta_commands = dataset_files.make_dataset_meta_commands( append_dataset_files_info) append_dataset_meta_results = dataset_files.run_list_command( append_dataset_meta_commands) dataset_files.parse_dataset_meta_results(append_dataset_meta_results, append_dataset_files_info) remove_dataset_files_info(in_dataset_files_info, remove_list_dataset) out_dataset_files_info = combine_dataset_files_info( in_dataset_files_info, append_dataset_files_info) print('appended list_dataset: ', str(append_list_dataset)) print('removed list_dataset: ', str(remove_list_dataset)) nested_dict.save_json_file(out_dataset_files_info, out_dataset_files_info_filename)
# [mc_dataset_common_names_filename, ['2016', '2017', '2018']], # [mc_dataset_2016_names_filename, ['2016']], # [mc_dataset_2017_names_filename, ['2017']], # [mc_dataset_2018_names_filename, ['2018']], # ]) #print ('dataset_names:', mc_dataset_names) # Ex) tag_meta[2016] = RunIISummer16, MiniAODv3, NanoAODv5 mc_tag_meta = datasets.parse_mc_tag_meta(mc_tag_meta_filename) if make_data_datasets: # Ex) data_tag_meta[2016][B][MET][miniaod] = 17Jul2018 data_tag_meta = datasets.parse_data_tag_meta(data_tag_meta_filename) if make_mc_datasets: # mc_datasets[mc_dataset_name][year][data_tier][path] = {"parent_chain":[], "children":[], "creation time":string, "size":int, "files":int, "events:"int} mc_datasets = nested_dict.load_json_file(mc_datasets_filename) datasets.check_false_none_mc_datasets(mc_datasets) # Make meta data path_to_keys_mc_datasets = datasets.get_path_to_keys_mc_datasets(mc_datasets) search_string_to_keys_mc_datasets = datasets.get_search_string_to_keys_mc_datasets(mc_tag_meta, mc_datasets) same_parent_paths = datasets.get_same_parent_paths(mc_datasets) multiple_mc_datasets = datasets.get_multiple_mc_datasets(mc_datasets) mini_to_nanos_from_nanoaod = datasets.get_mini_to_nanos_from_nanoaod_mc_datasets(mc_datasets) nano_to_mini_from_miniaod = datasets.get_nano_to_mini_from_miniaod_mc_datasets(mc_datasets) if make_data_datasets: data_datasets = nested_dict.load_json_file(data_datasets_filename) datasets.check_false_none_data_datasets(data_datasets) datasets.print_multiple_data_datasets(data_datasets) nested_dict.save_json_file(data_datasets, selected_data_datasets_filename)
initialize_arguments(args) valid, log = are_arguments_valid(args) if not valid: print('[Error] '+log) sys.exit() queue = ucsb_queue.ucsb_queue() #jobs_info_filename = 'jsons/submitted_test_mc_jobs_info.json' #output_json = 'jsons/checked_test_mc_jobs_info.json' #jobscript_check_filename = './copy_aods_check_entries.py' #statuses = ['submitted'] jobs_info_filename = args['jobs_info_filename'] output_json = args['output_json'] jobscript_check_filename = args['jobscript_check_filename'] statuses = args['statuses'] # Checks the jobs # jobs_info = [{'command_script':command_script, 'other_global_key':other_global_key, 'ignore_keys':['job_id', 'job_status', ...]},{'key_for_job':key_for_job},{'key_for_job':key_for_job},...] jobs_info = nested_dict.load_json_file(jobs_info_filename) # Each job type should make job_script, and job_check_script # The ./job_check_script job_log_string should return 'success' or 'fail' for a job_log_string # statuses: [status], where status = 'submitted', 'done', 'fail', 'success', 'to_submit' queue.check_jobs(jobs_info, statuses, jobscript_check_filename, args['debug']) #queue.check_jobs(jobs_info, ['submitted', 'done', 'fail', 'success', 'to_submit'], jobscript_check_filename) queue.print_jobs_status(jobs_info) nested_dict.save_json_file(jobs_info, output_json)
[mc_dataset_common_names_filename, ['2016', '2017', '2018']], [mc_dataset_2016_names_filename, ['2016']], [mc_dataset_2017_names_filename, ['2017']], [mc_dataset_2018_names_filename, ['2018']], ]) #print ('dataset_names:', mc_dataset_names) # Ex) tag_meta[2016] = RunIISummer16, MiniAODv3, NanoAODv5 mc_tag_meta = datasets.parse_mc_tag_meta(mc_tag_meta_filename) if make_data_datasets: # Ex) data_tag_meta[2016][B][MET][miniaod] = 17Jul2018 data_tag_meta = datasets.parse_data_tag_meta(data_tag_meta_filename) if make_mc_datasets: # mc_datasets[mc_dataset_name][year][data_tier][path] = {"parent_chain":[], "children":[], "creation time":string, "size":int, "files":int, "events:"int} mc_datasets = nested_dict.load_json_file(mc_datasets_filename) datasets.check_overlapping_paths_mc_datasets(mc_datasets) #print(nested_dict.get_from_nested_dict(mc_datasets, '/DYJetsToLL_M-50_HT-70to100_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/MINIAODSIM')) #print(nested_dict.get_nested_dict(mc_datasets, ['ZJetsToNuNu_HT-600To800', '2017', 'miniaod'])) # keys_mc_datasets = [ [mc_dataset_name, year, data_tier, search_string] ] keys_mc_datasets = datasets.get_keys_mc_datasets(mc_dataset_names, mc_tag_meta, data_tiers) #datasets.print_missing_mc_datasets(keys_mc_datasets, mc_datasets) ##nested_dict.fill_empty_nested_dict(mc_datasets, ['TTJets_SingleLeptFromT_Tune', '2016', 'miniaod']) ##datasets.check_false_none_mc_datasets(mc_datasets) #datasets.print_same_parent_mc_datasets(mc_datasets) #datasets.check_mini_nano_consistentcy_mc_datasets(mc_tag_meta, mc_datasets) filtered_mc_datasets = mc_datasets filtered_mc_datasets = filter_mc_datasets(filtered_mc_datasets, reject_string_ignore_case_mc_datasets, '_mtop1')
#search_term = "" #search_term = "files<10" search_term = args['sql_search'] database = sqlite3.connect(':memory:') cursor = database.cursor() files_to_download = [] files_to_remove = [] if do_mc or do_signal: dataset_files_info_filename = mc_dataset_files_info_filename # Make database # Load files # datasets_files_info[dataset][filename] = {'number_events':int, 'check_sum':int, 'modification_date':int, 'file_size':int} mc_dataset_files_info = nested_dict.load_json_file( mc_dataset_files_info_filename) # mc_dataset_names[year] = [(mc_dataset_name, mc_dir)] mc_dataset_names = datasets.parse_multiple_mc_dataset_names([ [mc_dataset_common_names_filename, ['2016', '2017', '2018']], [mc_dataset_2016_names_filename, ['2016']], [mc_dataset_2017_names_filename, ['2017']], [mc_dataset_2018_names_filename, ['2018']], ]) #print ('dataset_names:', mc_dataset_names) # Ex) tag_meta[2016] = RunIISummer16, MiniAODv3, NanoAODv5 mc_tag_meta = datasets.parse_mc_tag_meta(mc_tag_meta_filename) # keys_mc_datasets = [ [mc_dataset_name, year, data_tier, search_string] ] keys_mc_datasets = datasets.get_keys_mc_datasets( mc_dataset_names, mc_tag_meta, data_tiers)