class ToyDataSetManager(RootHelperBase): def __init__(self): """Initialize whatever is needed""" self.my_logger = Logger() self.log = self.my_logger.getLogger(self.__class__.__name__, 10) # } self.DEBUG = self.my_logger.is_debug() self.pp = pprint.PrettyPrinter(indent=4) # initialize RooFit gSystem.Load("libHiggsAnalysisCombinedLimit.so") self.output_filename = "worskapce_with_embedded_toys.root" def set_toys_path(self, toys_path): """ Set the path for the toy dataset.There is aleays one active toy held in self.toys. """ self.toys_path = toys_path self.toys = self.get_object(path=toys_path, object_type=RooAbsData, clone=False) def set_workspace_path(self, ws_path): """ Set the path for the workspace where toys will be included. There is only one workspace that can be active in the class. """ self.ws = self.get_object(path=ws_path, object_type=RooWorkspace, clone=False) def set_output_file_name(self, output_filename): """ Set the name of the output root file. """ self.output_filename = output_filename def set_new_toys_name(self, new_toys_name): """ Set name for toys in the workspace """ self.new_toys_name = new_toys_name def import_toys_to_ws(self, ws_path=None, toys_path=None, output_filename=None, new_toys_name=None): """ Imports a given toys dataset (or multiple toys) into the workspace and dumps to new root file. Parameters: ----------- ws_path : path to exisitng workspace (string) toys_path : path or list of paths to toys.TODO add regexp parsing to import matching toys. output_filename : file name of the output workspace new_toys_name : in case of one toy import, a new name can be set. In case of list, the name is set to be the same as in the source file. Returns: -------- Returns 0 in case it goes trough without erorrs(?). """ # TODO set checks for the input provided if ws_path: self.set_workspace_path(ws_path) if output_filename: self.set_output_file_name(output_filename) if new_toys_name: self.set_new_toys_name(new_toys_name) try: self.ws except AttributeError: raise AttributeError, "You need to provide workspace path." if toys_path: toys_path_list = [] if isinstance(toys_path, list): toys_path_list = toys_path elif isinstance(toys_path, str): toys_path_list = [toys_path] for the_toy in toys_path_list: self.set_toys_path(the_toy) toys_name = self.get_paths(the_toy)[-1] # just getthe name of toys object in the root file. self.log.info("Setting toys name in workspace to: {0}".format(toys_name)) self.set_new_toys_name(toys_name) self.toys.SetName(self.new_toys_name) getattr(self.ws, "import")(self.toys) self.log.info( "Imported DataSet '{0}' into workspace '{1}'.".format(self.toys.GetName(), self.ws.GetName()) ) else: try: self.toys except AttributeError: raise AttributeError, "You need to provide toys path." try: self.new_toys_name except AttributeError: toys_name = self.get_paths(self.toys_path)[-1] # just getthe name of toys object in the root file. self.log.info("Setting toys name in workspace to: {0}".format(toys_name)) self.set_new_toys_name(toys_name) self.toys.SetName(self.new_toys_name) getattr(self.ws, "import")(self.toys) self.log.info("Imported DataSet '{0}' into workspace '{1}'.".format(self.toys.GetName(), self.ws.GetName())) self.ws.data(self.toys.GetName()).Print() self.ws.data(self.toys.GetName()).Print("v") # write workspace self.ws.writeToFile(self.output_filename) self.log.info("Writing workspace '{0}' to file {1}".format(self.ws.GetName(), self.output_filename)) return 0 def set_dataset_name(self, dataset_name): """ Set name of the dataset in workspace. """ self.dataset_name = dataset_name def import_dataset_to_ws(self, dataset, workspace, output_filename=None, new_name=None): """ Import dataset to worspace workspace. """ if new_name: dataset.SetName(new_name) if output_filename: self.set_output_file_name(output_filename) self.log.info( "Imported DataSet '{0}' into workspace '{1}' and written to file {2}.".format( dataset.GetName(), workspace.GetName(), self.output_filename ) ) pass def set_workspace(self, workspace): """ Provide workspace from path naload it to self.ws or provide directly workspace and load it to self.ws """ if isinstance(workspace, RooWorkspace): self.ws = workspace self.log.debug("Loaded in workspace {0}.".format(self.ws.GetName())) elif isinstance(workspace, str): self.set_workspace_path(self, workspace) self.log.debug("Loaded in workspace {0} from path: ".format(workspace)) def dump_datasets_to_file(self, output_filename=None, access="RECREATE"): """ Write all datasets collected in the basket(RootHelperBase) to a file. """ if output_filename: self.set_output_file_name(output_filename) self.dump_basket_to_file(self.output_filename, access) self.log.info("All items from the basket have been written to file: {0}".format(self.output_filename)) return 0 def get_dataset_from_tree( self, path_to_tree, tree_variables, weight="1==1", weight_var_name=0, dataset_name="my_dataset", basket=True, category=None, ): """ Creates RooDataSet from a plain root tree given: - variables name list - weight expression. It works in the same way as TTree cut. Returns: -------- - RooDataSet - also fills the basket with datasets (basket inhereted from RootHelperBase class) TODO ---- - add implementation for category setting(check in prepare_toy_datasets_for_sync) - check if adding toy dataset to each channel workspace individually behaves well after combineCards.py. """ # make RooRealVars from tree_variables my_arg_set = RooArgSet() my_rrv = dict() for var_name in tree_variables: # TODO implement check that branch exist my_rrv[var_name] = RooRealVar(var_name, var_name, -999999999, 999999999) my_arg_set.add(my_rrv[var_name]) if self.DEBUG: self.log.debug("RooArgSet is now:") my_arg_set.Print() # get the tree from path_to_tree my_tree = self.get_TTree(path_to_tree, cut=weight) self.log.debug("Selected tree contains {0} events".format(my_tree.GetEntries())) # create RooDataSet and reduce tree if needed # self.dataset_from_tree = RooDataSet(dataset_name, dataset_name, my_tree, my_arg_set, weight).reduce(my_arg_set) self.dataset_from_tree = RooDataSet(dataset_name, dataset_name, my_tree, my_arg_set) # self.dataset_from_tree = RooDataSet(dataset_name, dataset_name, my_tree, my_arg_set, "", weight_var_name) # data[j]=new RooDataSet(Form("data%d",j),Form("data%d",j),outTree,RooArgSet(rCMS_zz4l_widthKD,rCMS_zz4l_widthMass,rweightFit),"","_weight_"); self.log.debug("RooDataSet contains {0} events".format(self.dataset_from_tree.sumEntries())) # .reduce(ROOT.RooArgSet(self.D0)) self.current_arg_set = my_arg_set # add dataset to basket if basket: self.add_to_basket(self.dataset_from_tree, new_name=dataset_name, new_title=dataset_name) return self.dataset_from_tree def get_current_arg_set(self): """ Return last dataset setup used by get_dataset_from_tree(). """ return self.current_arg_set
class RootHelperBase(object): """ Class that helps to pick any object from any root file by specifiying the path to the object like: path/to/root_file.root/path/to/root_object. Class can also check for type of object and return TypeError in case the object is not of a desired type. """ def __init__(self): self.my_logger = Logger() self.log = self.my_logger.getLogger(self.__class__.__name__, 10) self.DEBUG = self.my_logger.is_debug() self.pp = pprint.PrettyPrinter(indent=4) def check_in_opened_files_table(self, file_name,access): """ Makes a check in the open_files_table dictionary and returns pointer to the file if file is opened correct access mode. Returns: -------- tuple (is_opened, pointer_to_file). In case file doesn't exist (is_opened=False,pointer_to_file=None) """ try: self.open_files_table[file_name][access] except KeyError: self.log.debug('File {0} is not opened in {1} mode.'.format(file_name, access)) return (False, None) else: the_file = self.open_files_table[file_name][access] if isinstance(the_file,TFile): if (the_file.IsOpen() and not the_file.IsZombie()): self.log.debug('File {0} is already opened in {1} mode. Return pointer to file.'.format(file_name, access)) return (True, the_file) else: self.log.debug('File {0} in {1} mode is either closed or zombie.'.format(file_name, access)) self.open_files_table[file_name][access] = None return (False, None) else: self.log.debug('File {0} is not opened in {1} mode.'.format(file_name, access)) return (False, None) def update_opened_files_table(self, file_object, access): """ Update the status of files opened. file_name: acces : file_pointer structure. """ try: self.open_files_table except AttributeError: #self.open_files_table = collections.OrderedDict() self.open_files_table = {} self.open_files_table[file_object.GetName()] = {access : file_object} else: try: self.open_files_table[file_object.GetName()] except KeyError: self.open_files_table[file_object.GetName()] = {access : file_object} else: self.open_files_table[file_object.GetName()].update({access : file_object}) #self.open_files_table['dummy'].update({access : file_object}) #self.open_files_table[file_object.GetName()][access] = file_object if self.DEBUG: self.pp.pprint(self.open_files_table) return 0 def TFile_safe_open(self, file_name, access = 'READ'): """ Safely open TFile object. Memory is saved by cheking if the file is already open by looking up in the list open_files_table. """ #check if file is already openedby looking-up the opend files dict is_opened=False rootfile= None try: self.open_files_table except AttributeError: pass else: is_opened, rootfile = self.check_in_opened_files_table(file_name,access) if is_opened: self.log.debug('Returning pointer to ROOT file: {0}'.format(file_name)) return rootfile self.log.debug('Opening ROOT file: {0}'.format(file_name)) if access.upper() == 'READ' and not os.path.exists(file_name): raise IOError, 'File path does not exist: {0}'.format(file_name) else: base_dir = os.path.dirname(file_name) misc.make_sure_path_exists(base_dir) rootfile = TFile.Open(file_name,access) self.update_opened_files_table(rootfile, access) if not rootfile: raise IOError, 'The file {0} either doesn\'t exist or cannot be open'.format(file_name) return rootfile def get_paths(self, path): """ Returns tuple (path_to_root_file, path_to_root_object_in_root_file) """ path_contains_file = ('.root' in path) path_segments = path.split('.root') if path.endswith('.root'): #only root file path exists return (path,"") #print path_segments #assert 1<len(path_segments)<=2, 'Path should be in format <path/to/dir>root_object_file.root/path_to_root_object_in_file' assert 0<len(path_segments)<=2, 'Path should be in format <path/to/dir>root_object_file.root/path_to_root_object_in_file' path_to_file = "" if len(path_segments)==2: #path contains file name and object path in the root file path_to_file = path_segments[0]+'.root' self.log.debug('Src root file: {0}'.format(path_to_file )) #path_to_root_object = string.join(path_segments[-1].split('/')[1:],'/') #to remove the '/' after .root if path_segments[-1].startswith('/'): path_to_root_object = path_segments[-1][1:] #to remove the '/' after .root else: path_to_root_object = path_segments[-1] #there is no '/' at the beggining self.log.debug('Src root_object name: {0}'.format(path_to_root_object)) return (path_to_file,path_to_root_object) #path_to_file = path_segments[0]+'.root' #self.log.debug('Src root file: {0}'.format(path_to_file )) #path_to_root_object = string.join(path_segments[-1].split('/')[1:],'/') #to remove the '/' after .root #self.log.debug('Src root_object name: {0}'.format(path_to_root_object)) #return (path_to_file,path_to_root_object) def get_object(self, path, object_type=None, clone=False): """ Get any root object copy from path and check it's type. The object is copied from the file if needed. """ path_to_file, path_to_root_object = self.get_paths(path) root_object_file = self.TFile_safe_open(path_to_file, 'READ') the_object = root_object_file.Get(path_to_root_object) is_TTree = isinstance(the_object,TTree) if clone: if not is_TTree: the_object = copy.deepcopy(root_object_file.Get(path_to_root_object)) self.log.debug('Coping root_object {0} of type={1}.'.format(path_to_root_object, type(the_object))) root_object_file.Close() else: #FIXME self.log.warn('Cloning the full tree {0}. !!! Still not fully tested !!!'.format(path_to_root_object)) the_object = root_object_file.Get(path_to_root_object).CloneTree() #will not close file since it will destroy the object. Better to write the tree down first, then close file. else: self.log.debug('Pointer to root_object {0} of type={1} is returned.'.format(path_to_root_object, type(the_object))) return the_object def get_TTree(self,path , cut = None, clone = False): """ Get a tree from the path of format //machine/file_name.root/subdir/tree_name. If path is list it will asume TChain. Wildcards can be used but ".root" has to exost in the path name, otherwise 'segmentation violation' """ the_tree = TChain() if isinstance(path, list): tree_name = self.get_paths(path[0])[1] the_tree.SetName(tree_name) for item in path: assert isinstance(item,str),'The tree path should be of string format and not: {0}'.format(type(item)) add_result = the_tree.Add(item) elif isinstance(path, str): tree_name = self.get_paths(path)[1] the_tree.SetName(tree_name) add_result = the_tree.Add(path) self.log.debug('TChain has been constructed from {0} files with correct tree names.'.format(add_result)) if cut: assert isinstance(cut, str), 'The TTree cut has to be string value, not {0} !!!'.format(type(cut)) clone = True the_selection_tree = the_tree.CopyTree(cut) return the_selection_tree else: return the_tree def get_histogram(self,path, hist_type = TH1, clone = False): """ Get TH1 object or any other that inherits from TH1 """ return self.get_object(path,hist_type, clone) def get_embedded_object(self, path_to_container, container_type = None, embedded_object = None, object_type = None, clone = False): """ Get an object embedded into another class, like e.g. a TH1 from TCanvas saved in file. In case only path_to_container is given, it will return the container like with get_object method. """ pass def add_to_basket(self,root_object, new_name = None, new_title = None): """ Add object to the basket with new_name and new_title. If new_name contains "/" then a directory will be created inside the file. (TODO) """ if new_name: #name_in_basket = new_name new_name_no_subfolders = new_name.split('/')[-1] #remove subfolder name from the new_name root_object.SetName(new_name_no_subfolders) name_in_basket = new_name else: name_in_basket = root_object.GetName() if new_title: root_object.SetTitle(new_title) try: self.root_fruit_basket except AttributeError: self.root_fruit_basket = collections.OrderedDict() self.log.debug('Creating new root-object basket.') else: if self.DEBUG and len(self.root_fruit_basket)<10: self.log.debug('Adding root-object to existing basket. Basket state (printed if less then 10 items):') self.pp.pprint(self.root_fruit_basket) self.root_fruit_basket[name_in_basket] = root_object def _get_subfolders_and_name(self,path): """ Gives back the 2 element tuple with subfolder path and a name of root_object """ path_segments = path.split('/') assert len(path_segments)>0, 'The name should not be empty string.' if len(path_segments) > 1: #check if first is '/' if path_segments[0]=='': path_segments.pop(0) subfolders = string.join(path_segments[:-1],'/') root_object_name = path_segments[-1] self.log.debug('Root-subfolder: {0}'.format(subfolders)) self.log.debug('Root-object name: {0}'.format(root_object_name)) return (subfolders, root_object_name) else: root_object_name = path_segments[-1] return (None, root_object_name) def _get_directory(self,root_file, path): """ Create and cd to the directory if given like a/b/c """ root_file.cd() #subfolders = self._get_subfolders_and_name(path)[0] if path: self.log.debug('Creating root-subfolder {0}'.format(path)) mkdir_res = root_file.mkdir(path) self.log.info('Root-subfolder {0} created with code = {1}'.format(path, mkdir_res)) root_file.cd(path) else: #no subfolder will be created root_file.cd() self.log.debug('Current directory: {0}'.format(gDirectory.GetPath())) def flush_basket(self): """ Resets the basket content and delets the basket. """ try: del self.root_fruit_basket except: raise RuntimeError, 'Basket cannot be flushed and destroyed! It even doesn\'t exist ...' else: self.log.info('Basket flushed!') return 0 def dump_basket_to_file(self, file_name, access = 'UPDATE'): """ Save what is in basket to a file. Create directories in the path if needed. """ out_file = self.TFile_safe_open(file_name, access) out_file.cd() if self.DEBUG: self.log.debug('Dumping basket to file.') self.pp.pprint(self.root_fruit_basket) for item_name in self.root_fruit_basket.keys(): subfolders, root_object_name = self._get_subfolders_and_name(item_name) self._get_directory(out_file, subfolders) #it will create and cd to the directory if given like a/b/c self.log.debug('Writing root-object: {0} Object name: {1} ; Object title: {2}'.format(self.root_fruit_basket[item_name],self.root_fruit_basket[item_name].GetName(),self.root_fruit_basket[item_name].GetTitle())) is_TTree = isinstance(self.root_fruit_basket[item_name],TTree) if is_TTree: self.log.debug('This is a TTree object : {0}'.format(self.root_fruit_basket[item_name])) copy_tree_name = self.root_fruit_basket[item_name].GetName() copy_tree_title = self.root_fruit_basket[item_name].GetTitle() tree_for_saving = self.root_fruit_basket[item_name].CloneTree(0) copy_res = tree_for_saving.CopyEntries(self.root_fruit_basket[item_name]) tree_for_saving.SetNameTitle(copy_tree_name,copy_tree_title) write_res = tree_for_saving.Write() else: write_res = self.root_fruit_basket[item_name].Write() if write_res == 0 : self.log.error('The object {0} cannot be written into {1}'.format(item_name, gDirectory.GetPath())) else: self.log.info('The object {0} has been written into {1}'.format(item_name, gDirectory.GetPath())) out_file.Close() self.log.info('Saved the basket with {1} items into the file: {0}'.format(file_name, len(self.root_fruit_basket))) self.flush_basket() return 0