def get_no_nano_in_mini_mc_datasets(mc_tag_meta, full_mc_datasets): no_nano_in_mini_mc_datasets = {} mini_to_nano_from_nano = {} for mc_dataset_name in mc_datasets: for year in mc_datasets[mc_dataset_name]: if 'nanoaod' not in mc_datasets[mc_dataset_name][year]: continue for path in mc_datasets[mc_dataset_name][year]['nanoaod']: parent = mc_datasets[mc_dataset_name][year]['nanoaod'][path][ 'parent_chain'][0] mini_to_nano_from_nano[parent] = path # Check if nano is in mini for mc_dataset_name in mc_datasets: for year in mc_datasets[mc_dataset_name]: if 'miniaod' not in mc_datasets[mc_dataset_name][year]: continue for path in mc_datasets[mc_dataset_name][year]['miniaod']: children = mc_datasets[mc_dataset_name][year]['miniaod'][path][ 'children'] if path not in mini_to_nano_from_nano: found_nano = False for nano in children: if mc_tag_meta[year][2] in nano: found_nano = True break if not found_nano: nested_dict.fill_nested_dict( no_nano_in_mini_mc_datasets, [mc_dataset_name, year, 'miniaod', path], mc_datasets[mc_dataset_name][year]['miniaod'][path] ['children']) return no_nano_in_mini_mc_datasets
def get_rejected_mc_datasets(mc_datasets, reject_mc_function, reject_input): rejected_mc_datasets = {} for mc_dataset_name in mc_datasets: for year in mc_datasets[mc_dataset_name]: for data_tier in mc_datasets[mc_dataset_name][year]: for path in mc_datasets[mc_dataset_name][year][data_tier]: if reject_mc_function(reject_input, mc_datasets, mc_dataset_name, year, data_tier, path): path_info = mc_datasets[mc_dataset_name][year][data_tier][path] nested_dict.fill_nested_dict(rejected_mc_datasets, [mc_dataset_name, year, data_tier, path], path_info) return rejected_mc_datasets
def get_rejected_data_datasets(data_datasets, reject_data_function, reject_input): rejected_data_datasets = {} for stream in data_datasets: for year in data_datasets[stream]: for run_group in data_datasets[stream][year]: for data_tier in data_datasets[stream][year][run_group]: for path in data_datasets[stream][year][run_group][data_tier]: if reject_data_function(reject_input, data_datasets, stream, year, run_group, data_tier, path): path_info = data_datasets[stream][year][run_group][data_tier][path] nested_dict.fill_nested_dict(rejected_data_datasets, [stream, year, run_group, data_tier, path], path_info) return rejected_data_datasets
def get_unrejected_if_possible_mc_datasets(mc_datasets, reject_mc_function, reject_input=None): unrejected_mc_datasets = {} for mc_dataset_name in mc_datasets: for year in mc_datasets[mc_dataset_name]: for data_tier in mc_datasets[mc_dataset_name][year]: found_mc_dataset = False list_rejected_mc_dataset = [] for path in mc_datasets[mc_dataset_name][year][data_tier]: path_info = mc_datasets[mc_dataset_name][year][data_tier][path] if reject_mc_function(reject_input, mc_datasets, mc_dataset_name, year, data_tier, path): list_rejected_mc_dataset.append([path, path_info]) continue found_mc_dataset = True if not found_mc_dataset: for rejected_mc_dataset in list_rejected_mc_dataset: nested_dict.fill_nested_dict(unrejected_mc_datasets, [mc_dataset_name, year, data_tier, rejected_mc_dataset[0]], rejected_mc_dataset[1]) return unrejected_mc_datasets
def get_rejected_if_possible_mc_datasets(mc_datasets, reject_mc_function, reject_input, verbose=False): rejected_mc_datasets = {} for mc_dataset_name in mc_datasets: for year in mc_datasets[mc_dataset_name]: for data_tier in mc_datasets[mc_dataset_name][year]: found_mc_dataset = False list_rejected_mc_dataset = [] for path in mc_datasets[mc_dataset_name][year][data_tier]: path_info = mc_datasets[mc_dataset_name][year][data_tier][path] if reject_mc_function(reject_input, mc_datasets, mc_dataset_name, year, data_tier, path): if verbose: print('Possibly rejected '+path) list_rejected_mc_dataset.append([path, path_info]) continue if verbose: print('Found '+path) found_mc_dataset = True if found_mc_dataset: if verbose: print('Rejected '+path) for rejected_mc_dataset in list_rejected_mc_dataset: nested_dict.fill_nested_dict(rejected_mc_datasets, [mc_dataset_name, year, data_tier, rejected_mc_dataset[0]], rejected_mc_dataset[1]) return rejected_mc_datasets
def select_paths_from_multiple(mc_tag_meta, mc_datasets, multiple_selection): mc_datasets_selected = {} mc_datasets_non_selected = {} for mc_dataset_name in mc_datasets: for year in mc_datasets[mc_dataset_name]: for data_tier in mc_datasets[mc_dataset_name][year]: search_string = datasets.get_mc_dataset_search_string(mc_tag_meta, mc_dataset_name, year, data_tier) multiple_selected_paths = None if search_string in multiple_selection: multiple_selected_paths = multiple_selection[search_string]['selected_paths'] if len(mc_datasets[mc_dataset_name][year][data_tier]) == 1: path = next(iter(mc_datasets[mc_dataset_name][year][data_tier])) nested_dict.fill_nested_dict(mc_datasets_selected, [mc_dataset_name, year, data_tier, path], mc_datasets[mc_dataset_name][year][data_tier][path]) else: for path in mc_datasets[mc_dataset_name][year][data_tier]: if multiple_selected_paths != None: if path in multiple_selected_paths: nested_dict.fill_nested_dict(mc_datasets_selected, [mc_dataset_name, year, data_tier, path], mc_datasets[mc_dataset_name][year][data_tier][path]) else: print('[Warning] '+path+' is not in multiple_selection. Will not select any dataset.') nested_dict.fill_nested_dict(mc_datasets_non_selected, [mc_dataset_name, year, data_tier, path], mc_datasets[mc_dataset_name][year][data_tier][path]) return mc_datasets_selected, mc_datasets_non_selected