def ROOT2json(cfg_parser): """ For converting data in the ROOT files to .json files for plotting. Increases ease-of-use for making Data/MC plots (don't have to re-process the ROOT file just to make a single plot). @param cfg_parser Object which parsed the configuration file """ logging.getLogger('share/datamc.log') loggingLEVEL = logging.getLogger().getEffectiveLevel() # DEBUG, INFO, ERROR, etc. logging.info("") logging.critical(" -- In root2json.py") logging.info(" ------------ ") logging.info(" Initializing the config file.") ## -- Configuration -- ## p_varList = cfg_parser.get('datamc','vars') # ex. 'share/varNames.txt' p_inputfiles = cfg_parser.get('datamc','rootfiles') # ex. 'share/datamc_ntuples.txt' p_lepton = cfg_parser.get('datamc','lepton') # ex. muel (both muon & electron) p_outfile = cfg_parser.get('datamc','jsonfilename') # ex. 'elLHMedium_pre_A1_1tagin' p_nEvents = int(cfg_parser.get('datamc','nEvents')) # ex. -1 ## ------------------- ## treename = info.treename() bckg_names = info.physicsSamples()['backgrounds'] savePath = "{0}/{1}".format(info.getJsonPath(),p_lepton) if not os.path.isdir(savePath): os.makedirs(savePath) logging.info(" Set the output path: {0}".format(savePath)) ## -- Load various files of data inputfiles = info.read_txt_file(p_inputfiles) if not inputfiles: print print " ERROR: File {0} is empty (no files!) ".format(p_inputfiles) print from sys import exit exit(1) i_varList = info.read_txt_file(p_varList) varList = [] for p_var in i_varList: p_var = p_var.split('[')[0] if p_var not in varList: varList.append(p_var) ## -- Loop over input files logging.info(" inputfiles = {0}".format(inputfiles)) logged_files = {} # keeping track if a sample has been used before # the list of input files may have multiple root files # for the same sample (ttbar, wjets, etc.) ## -- Make a simple text file that stores all of the json files we just made newfile = open("share/jsonfiles2plot.txt","w") for p in inputfiles: jsonData = config.AutoVivification() p_file = ROOT.TFile.Open(p) p_tree = p_file.Get(treename) p_tree.GetEntry(0) # just to get the mcChannelNumber name = config.getSampleName(root_tree=p_tree,dsid=p_tree.mcChannelNumber)['name'] # need different names from each file (otherwise different ttbar files # will overwrite each other!) ## -- load the new DataMC object if name not in logged_files.keys(): entry = DataMC_Type(name) logged_files[name] = entry for var in varList: entry.varVals[var] = [] entry.scaleFactors[var] = [] entry.lepCharges[var] = [] entry.lepNames[var] = [] else: entry = logged_files[name] print "\n ++ Producing json file from {0}\n".format(p) logging.info(" ++ Running {0}".format(name)) ## -- Attach the data (values,weights) to each DataMC object entry = addData.addData(entry, p_tree, varList, cfg_parser) # Get data from ROOT logging.info(" Exporting data to json format.") ## -- Log the DataMC object in the dictionary ## not sure that this is being used effectively... logged_files[name] = entry ## Save each json file now that we have looped over the file logging.info(" Saving json information.") outfile_name = '{0}/%s_{1}_{2}.json'.format(savePath,p_outfile,name) newfile.write("%s\n" % outfile_name) for var in varList: # put information in dictionaries to be saved to json jsonData[var][entry.name] = entry.varVals[var] jsonData[var][entry.name+'_weight'] = entry.scaleFactors[var] jsonData[var][entry.name+'_lepNames'] = entry.lepNames[var] jsonData[var]['LUMI'] = info.LUMI() print " Saving output to {0}".format(outfile_name%(var)) logging.info(" -- Saving output to {0}".format(outfile_name)) with open(outfile_name%(var),'w') as outputfile: json.dump(jsonData[var], outputfile) logging.info(" End root2json.py") return
def main(self,parser): """ Main function in the datamc class -- does all the directing for reading/writing of data from ROOT to json files, and plotting histograms. """ ## -- Configuration -- ## self.p_vars_file = parser.get('datamc','vars') # ex. share/varNames.txt self.p_jsonoutput = str2bool(parser.get('datamc','makejsonfile')) # ex. False self.p_plotoutput = str2bool(parser.get('datamc','makeplot')) # ex. True self.p_mergejson = str2bool(parser.get('datamc','mergejson')) # ex. True self.p_outfile = parser.get('datamc','jsonfilename') # ex. pre self.p_lepton = parser.get('datamc','lepton') # ex. muel ## ------------------- ## ## -- Sanity check -- ## if not any([self.p_plotoutput,self.p_jsonoutput,self.p_mergejson]): print print " You have specified that you don't want to " print " make json outputs, don't want to make " print " plots, and don't want to merge json files. " print " There's nothing left to do. " print " Exiting. " print sys.exit(1) ## ------------------ ## self.p_varlist = info.read_txt_file(self.p_vars_file) # variables from text file # for variables that may have [N], e.g., jet_pt[0], make a list # that just contains the name, e.g., jet_pt. # Plan to is to make a single json file for jet_pt, but # only plot [N], or [N+1] (e.g., the user specifies both jet_pt[0] and jet_pt[1] # in the text file) self.p_varlist_nolead = list(set([p_var.split('[')[0] for p_var in self.p_varlist])) loggingLEVEL = logging.getLogger().getEffectiveLevel() logging.info(" -- In file dataMC.py") logging.info(" -- Make json output: {0} ".format(self.p_jsonoutput)) ## -- Conver ROOT to JSON -- ## if self.p_jsonoutput: import pyDataMC.root2json as root2json logging.info(" > Specified json output ") logging.info(" Will produce json files and then plots automatically ") logging.info(" -- Making json output") print "\n -- Converting ROOT to json output -- \n" ## Making json output, and then making plots (one step is easier...) root2json.ROOT2json(parser) ## -- Merge json files -- ## if self.p_mergejson: ## merge before plotting!! self.merge_json_files() ## -- Plot Histograms -- ## if self.p_plotoutput: from pyDataMC.json2hist import DataMCPlotter logging.info(" -- Making plots from json output") print "\n -- Producing figures -- \n" plotter = DataMCPlotter(parser) plotter.initialize() for var in self.p_varlist: print " ++ Plotting {0} ++\n".format(var) logging.info(" ++ Plotting {0} ++".format(var)) plotter.datamcplotter(var) logging.info(" Finished datamc.py class DataMC") return