def read(scan, param, files, chop, remove_near_min, rezero, remove_delta=None, improve=False, remove_dups=True): # print files goodfiles = [f for f in files if plot.TFileIsGood(f)] limit = plot.MakeTChain(goodfiles, 'limit') graph = plot.TGraphFromTree( limit, param, '2*%s' % DELTANLL, 'quantileExpected > -1.5') # print 'INPUT' # graph.Print() graph.SetName(scan) graph.Sort() if remove_dups: plot.RemoveGraphXDuplicates(graph) if remove_delta is not None: plot.RemoveSmallDelta(graph, remove_delta) plot.RemoveGraphYAbove(graph, chop) plot.ReZeroTGraph(graph, rezero) if remove_near_min is not None: plot.RemoveNearMin(graph, remove_near_min) if improve: global NAMECOUNTER spline = ROOT.TSpline3("spline3", graph) func = ROOT.TF1('splinefn' + str(NAMECOUNTER), partial(Eval, spline), graph.GetX()[0], graph.GetX()[graph.GetN() - 1], 1) func.SetNpx(NPX) NAMECOUNTER += 1 plot.ImproveMinimum(graph, func, True) # graph.Print() if FILTER is not None: plot.FilterGraph(graph, FILTER) if REMOVE_X_RANGES is not None: for remove_x in REMOVE_X_RANGES: plot.RemoveInXRange(graph, remove_x[0], remove_x[1]) return graph
def read(scan, param, files, chop, remove_near_min, rezero, remove_delta=None, improve=False): # print files goodfiles = [f for f in files if plot.TFileIsGood(f)] limit = plot.MakeTChain(goodfiles, 'limit') # require quantileExpected > -0.5 to avoid the final point which is always committed twice # (even if the fit fails) graph = plot.TGraphFromTree(limit, param, '2*deltaNLL', 'quantileExpected > -0.5') graph.SetName(scan) graph.Sort() plot.RemoveGraphXDuplicates(graph) if remove_delta is not None: plot.RemoveSmallDelta(graph, remove_delta) plot.RemoveGraphYAbove(graph, chop) plot.ReZeroTGraph(graph, rezero) if remove_near_min is not None: plot.RemoveNearMin(graph, remove_near_min) if improve: global NAMECOUNTER spline = ROOT.TSpline3("spline3", graph) func = ROOT.TF1('splinefn' + str(NAMECOUNTER), partial(Eval, spline), graph.GetX()[0], graph.GetX()[graph.GetN() - 1], 1) NAMECOUNTER += 1 plot.ImproveMinimum(graph, func, True) # graph.Print() return graph
def read(scan, param, files, ycut): goodfiles = [f for f in files if plot.TFileIsGood(f)] limit = plot.MakeTChain(goodfiles, 'limit') graph = plot.TGraphFromTree(limit, param, '2*deltaNLL', 'quantileExpected > -1.5') graph.SetName(scan) graph.Sort() plot.RemoveGraphXDuplicates(graph) plot.RemoveGraphYAbove(graph, ycut) # graph.Print() return graph
def run_method(self): limit_sets = defaultdict(list) for filename in self.args.input: if not plot.TFileIsGood(filename): print '>> File %s is corrupt or incomplete, skipping' % filename if self.args.use_dirs is False: limit_sets['default'].append(filename) else: label = 'default' dirs = filename.split('/') # The last dir could be the mass, if so we ignore it and check the next if len(dirs) > 1: if not isfloat(dirs[-2]): label = dirs[-2] elif len(dirs) > 2: label = dirs[-3] limit_sets[label].append(filename) # print limit_sets for label, filenames in limit_sets.iteritems(): js_out = {} for filename in filenames: file = ROOT.TFile(filename) tree = file.Get('limit') for evt in tree: mh = str(evt.mh) if mh not in js_out: js_out[mh] = {} if evt.quantileExpected == -1: js_out[mh]['obs'] = evt.limit elif abs(evt.quantileExpected - 0.5) < 1E-4: js_out[mh]["exp0"] = evt.limit elif abs(evt.quantileExpected - 0.025) < 1E-4: js_out[mh]["exp-2"] = evt.limit elif abs(evt.quantileExpected - 0.160) < 1E-4: js_out[mh]["exp-1"] = evt.limit elif abs(evt.quantileExpected - 0.840) < 1E-4: js_out[mh]["exp+1"] = evt.limit elif abs(evt.quantileExpected - 0.975) < 1E-4: js_out[mh]["exp+2"] = evt.limit # print js_out jsondata = json.dumps(js_out, sort_keys=True, indent=2, separators=(',', ': ')) # print jsondata if self.args.output is not None: outname = self.args.output.replace( '.json', '_%s.json' % label) if self.args.use_dirs else self.args.output with open(outname, 'w') as out_file: print '>> Writing output %s from files:' % outname pprint.pprint(filenames, indent=2) out_file.write(jsondata)
def read(scan, param_x, param_y, file): # print files goodfiles = [f for f in [file] if plot.TFileIsGood(f)] limit = plot.MakeTChain(goodfiles, 'limit') graph = plot.TGraph2DFromTree(limit, param_x, param_y, '2*deltaNLL', 'quantileExpected > -0.5 && deltaNLL > 0') best = plot.TGraphFromTree(limit, param_x, param_y, 'quantileExpected > -0.5 && deltaNLL == 0') plot.RemoveGraphXDuplicates(best) assert(best.GetN() == 1) graph.SetName(scan) best.SetName(scan+'_best') # graph.Print() return (graph, best)
def read(scan, param, other_param, files, remove_dups=True): # print files goodfiles = [f for f in files if plot.TFileIsGood(f)] limit = plot.MakeTChain(goodfiles, 'limit') graph = plot.TGraphFromTree(limit, param, other_param, 'quantileExpected > -0.5') # print 'INPUT' # graph.Print() graph.SetName(scan) graph.Sort() if remove_dups: plot.RemoveGraphXDuplicates(graph) # graph.Print() return graph
def ReadScanFromTFiles(self, filenames, param_name, tree_selection='quantileExpected > -1.5'): # TODO: should report bad files here goodfiles = [f for f in filenames if plotting.TFileIsGood(f)] if len(goodfiles) == 0: raise RuntimeError('[ReadScanFromTFiles] no valid TFiles') limit = plotting.MakeTChain(goodfiles, 'limit') graph = plotting.TGraphFromTree(limit, param_name, '2*deltaNLL', tree_selection) # graph.SetName(label) graph.Sort() if self.verbosity >= 2: print '[ReadScanFromTFiles] Produced TGraph:' graph.Print() return graph
def run_method(self): limit_sets = defaultdict(list) for filename in self.args.input: if not plot.TFileIsGood(filename): print '>> File %s is corrupt or incomplete, skipping' % filename continue if not self.args.use_dirs: if 'default' not in limit_sets: limit_sets['default'] = ([], []) limit_sets['default'][0].append(filename) else: label = 'default' dirs = filename.split('/') # The last dir could be the mass, if so we ignore it and check the next if len(dirs) > 1: if not isfloat(dirs[-2]): label = dirs[-2] elif len(dirs) > 2: label = dirs[-3] if label not in limit_sets: limit_sets[label] = ([], []) limit_sets[label][0].append(filename) for label, (filenames, toyfiles) in limit_sets.iteritems(): js_out = {} for filename in filenames: file = ROOT.TFile(filename) tree = file.Get('limit') adding_cat_branch = False branches = [] for branch in tree.GetListOfBranches(): # Current logic says any branch after quantileExpected is a special # GOF branch labelled according to category if adding_cat_branch: branches.append(branch.GetName()) if branch.GetName() == 'quantileExpected': adding_cat_branch = True # print branches for evt in tree: mh = str(evt.mh) if mh not in js_out: js_out[mh] = {} if evt.quantileExpected != -1: continue if branches: for branch in branches: if branch not in js_out[mh]: js_out[mh][branch] = {} js_out[mh][branch]['toy'] = [] if evt.iToy <= 0: js_out[mh][branch]['obs'] = [ getattr(evt, branch) ] else: js_out[mh][branch]['toy'].append( getattr(evt, branch)) else: if 'toy' not in js_out[mh]: js_out[mh]['toy'] = [] if evt.iToy <= 0: js_out[mh]['obs'] = [evt.limit] else: js_out[mh]['toy'].append(evt.limit) for mh in js_out: if all([entry in js_out[mh] for entry in ['toy', 'obs']]): js_out[mh]["p"] = float( len([ toy for toy in js_out[mh]['toy'] if toy >= js_out[mh]['obs'][0] ])) / len(js_out[mh]['toy']) else: for branch in js_out[mh]: js_out[mh][branch]["p"] = float( len([ toy for toy in js_out[mh][branch]['toy'] if toy >= js_out[mh][branch]['obs'][0] ])) / len(js_out[mh][branch]['toy']) # print js_out jsondata = json.dumps(js_out, sort_keys=True, indent=2, separators=(',', ': ')) # print jsondata if self.args.output is not None: outname = self.args.output.replace( '.json', '_%s.json' % label) if self.args.use_dirs else self.args.output with open(outname, 'w') as out_file: print '>> Writing output %s from files:' % outname pprint.pprint(filenames, indent=2) out_file.write(jsondata)
def run_method(self): limit_sets = defaultdict(list) for filename in self.args.input: if not plot.TFileIsGood(filename): print '>> File %s is corrupt or incomplete, skipping' % filename continue if self.args.use_dirs is False: limit_sets['default'].append(filename) else: label = 'default' dirs = filename.split('/') # The last dir could be the mass, if so we ignore it and check the next if len(dirs) > 1: if not isfloat(dirs[-2]): label = dirs[-2] elif len(dirs) > 2: label = dirs[-3] limit_sets[label].append(filename) # print limit_sets for label, filenames in limit_sets.iteritems(): js_out = {} for filename in filenames: if plot.TFileIsGood(filename): file = ROOT.TFile(filename) tree = file.Get('limit') for evt in tree: mh = str(evt.mh) if mh not in js_out: js_out[mh] = {} if self.args.toys: js_out[mh]['toys'] = {} for limit in [ 'obs', 'exp0', 'exp-2', 'exp-1', 'exp+1', 'exp+2' ]: js_out[mh]['toys'][limit] = [] if self.args.toys: if evt.iToy > 0: if evt.quantileExpected == -1: js_out[mh]['toys']['obs'].append(evt.limit) elif abs(evt.quantileExpected - 0.5) < 1E-4: js_out[mh]['toys']["exp0"].append( evt.limit) elif abs(evt.quantileExpected - 0.025) < 1E-4: js_out[mh]['toys']["exp-2"].append( evt.limit) elif abs(evt.quantileExpected - 0.160) < 1E-4: js_out[mh]['toys']["exp-1"].append( evt.limit) elif abs(evt.quantileExpected - 0.840) < 1E-4: js_out[mh]['toys']["exp+1"].append( evt.limit) elif abs(evt.quantileExpected - 0.975) < 1E-4: js_out[mh]['toys']["exp+2"].append( evt.limit) elif evt.iToy == 0: if evt.quantileExpected == -1: js_out[mh]['obs'].append(evt.limit) else: if evt.quantileExpected == -1: js_out[mh]['obs'] = evt.limit if self.args.limit_err: js_out[mh]['obs_err'] = evt.limitErr elif abs(evt.quantileExpected - 0.5) < 1E-4: js_out[mh]["exp0"] = evt.limit if self.args.limit_err: js_out[mh]['exp0_err'] = evt.limitErr elif abs(evt.quantileExpected - 0.025) < 1E-4: js_out[mh]["exp-2"] = evt.limit if self.args.limit_err: js_out[mh]['exp-2_err'] = evt.limitErr elif abs(evt.quantileExpected - 0.160) < 1E-4: js_out[mh]["exp-1"] = evt.limit if self.args.limit_err: js_out[mh]['exp-1_err'] = evt.limitErr elif abs(evt.quantileExpected - 0.840) < 1E-4: js_out[mh]["exp+1"] = evt.limit if self.args.limit_err: js_out[mh]['exp+1_err'] = evt.limitErr elif abs(evt.quantileExpected - 0.975) < 1E-4: js_out[mh]["exp+2"] = evt.limit if self.args.limit_err: js_out[mh]['exp+2_err'] = evt.limitErr if self.args.toys: for mh in js_out.keys(): print "Expected bands will be taken from toys" print mh limits = sorted(js_out[mh]['toys']['obs']) #if mh == '160.0' or mh == '90.0' : # limits = [x for x in limits if x > 0.1] quantiles = array('d', [0.025, 0.160, 0.5, 0.840, 0.975]) res = array('d', [0., 0., 0., 0., 0.]) empty = array('i', [0]) ROOT.TMath.Quantiles(len(limits), len(quantiles), array('d', limits), res, quantiles, True, empty, 1) print res js_out[mh]['exp-2'] = res[0] js_out[mh]['exp-1'] = res[1] js_out[mh]['exp0'] = res[2] js_out[mh]['exp+1'] = res[3] js_out[mh]['exp+2'] = res[4] # print js_out jsondata = json.dumps(js_out, sort_keys=True, indent=2, separators=(',', ': ')) # print jsondata if self.args.output is not None: outname = self.args.output.replace( '.json', '_%s.json' % label) if self.args.use_dirs else self.args.output with open(outname, 'w') as out_file: print '>> Writing output %s from files:' % outname pprint.pprint(filenames, indent=2) out_file.write(jsondata)
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # Open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # Set all the parameter values locally using defaults if necessary grids = cfg['grids'] POIs = cfg['POIs'] opts = cfg['opts'] toys_per_cycle = cfg['toys_per_cycle'] zipname = cfg.get('zipfile', None) contours = cfg.get('contours', ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']) min_toys = cfg.get('min_toys', 500) max_toys = cfg.get('max_toys', 5000) signif = cfg.get('signif', 3.0) cl = cfg.get('CL', 0.95) verbose = cfg.get('verbose', False) make_plots = cfg.get('make_plots', False) # Write CLs values into the output even if current toys do not pass validation incomplete = cfg.get('output_incomplete', False) outfile = cfg.get('output', 'hybrid_grid.root') # NB: blacklisting not yet implemented for this method # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters' ) and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr( self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) points = [] blacklisted_points = [] for igrid in grids: assert (len(igrid) == 3) if igrid[2] == '': points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) else: blacklisted_points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) # This dictionary will keep track of the combine output files for each model point file_dict = {} for p in points: file_dict[p] = {} # The regex we will use to identify output files and extract POI values rgx = re.compile( 'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1])) # Can optionally copy output root files into a zip archive # If the user has specified a zipfile we will first # look for output files in this archive before scanning the # current directory if zipname: # Open the zip file in append mode, this should also # create it if it doesn't exist zipf = zipfile.ZipFile(zipname, 'a') for f in zipf.namelist(): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: if seed not in file_dict[p]: # For each model point have a dictionary keyed on the seed number # with a value pointing to the file in the archive in the format # ROOT expects: "zipfile.zip#higgsCombine.blah.root" file_dict[p][seed] = zipname + '#' + f # Now look for files in the local directory for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: # Don't add this file to the list if its seed number is already # a value in the dict. if seed not in file_dict[p]: # If we're using the zipfile we'll add this now and # then delete it from the local directory # But: only in the file is good, we don't want to pollute the zip # file with incomplete or failed jobs if zipname and plot.TFileIsGood(f): zipf.write(f) # assume this throws if it fails print 'Adding %s to %s' % (f, zipname) file_dict[p][seed] = zipname + '#' + f os.remove(f) else: # otherwise just add the file to the dict in the normal way file_dict[p][seed] = f if zipname: zipf.close() # These lists will keep track of the CLs values which we will use # to create the output TGraph2Ds output_x = [] output_y = [] output_data = {} output_ntoys = [] output_clserr = {} output_signif = {} # One list of Z-values per contour for contour in contours: output_data[contour] = [] output_clserr[contour] = [] output_signif[contour] = [] # Also keep track of the number of model points which have met the # CLs criteria total_points = 0 complete_points = 0 for key, val in file_dict.iteritems(): total_points += 1 name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) files = [x for x in val.values() if plot.TFileIsGood(x)] # Merge the HypoTestResult objects from each file into one res = self.GetCombinedHypoTest(files) # Do the validation of this model point # ok, point_res = self.ValidateHypoTest( res, min_toys=min_toys, max_toys=max_toys, contours=contours, signif=signif, cl=cl, output=self.args.output, verbose=verbose) if res is not None else (False, { "ntoys": 0 }) print '>> Point %s [%i toys, %s]' % ( name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE') if ok: complete_points += 1 # Make plots of the test statistic distributions if requested if res is not None and make_plots: self.PlotTestStat(res, 'plot_' + name, opts=cfg['plot_settings'], poi_vals=(float(key[0]), float(key[1]))) # Add the resulting CLs values to the output arrays. Normally just # for the model points that passed the validation criteria, but if "output_incomplete" # has been set to true then we'll write all model points where at least one HypoTestResult # is present if res is not None and (ok or incomplete) and self.args.output: output_x.append(float(key[0])) output_y.append(float(key[1])) output_ntoys.append(point_res['ntoys']) for contour in contours: output_data[contour].append(point_res[contour][0]) output_clserr[contour].append(point_res[contour][1]) output_signif[contour].append(point_res[contour][2]) # Do the job cycle generation if requested if not ok and self.args.cycles > 0: print '>>> Going to generate %i job(s) for point %s' % ( self.args.cycles, key) # Figure out the next seed numbers we need to run by finding the maximum seed number # so far done_cycles = val.keys() new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1 new_cycles = range(new_idx, new_idx + self.args.cycles) print '>>> Done cycles: ' + ','.join( str(x) for x in done_cycles) print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles) # Build to combine command. Here we'll take responsibility for setting the name and the # model parameters, making sure the latter are frozen set_arg = ','.join( ['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % ( name, set_arg, freeze_arg) # Build a command for each job cycle setting the number of toys and random seed and passing through any other # user options from the config file or the command line for idx in new_cycles: cmd = ' '.join([ 'combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx ] + self.passthru) self.job_queue.append(cmd) print ">> %i/%i points have completed and require no further toys" % ( complete_points, total_points) self.flush_queue() # Create and write output CLs TGraph2Ds here # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes if self.args.output: fout = ROOT.TFile(outfile, 'RECREATE') for c in contours: graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c])) graph.SetName(c) fout.WriteTObject(graph, c) # Also write a Graph with the CLsErr graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c])) graph.SetName('clsErr_' + c) fout.WriteTObject(graph, 'clsErr_' + c) # And a Graph with the significance graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c])) graph.SetName('signif_' + c) fout.WriteTObject(graph, 'signif_' + c) graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys)) graph.SetName('ntoys' + c) fout.WriteTObject(graph, 'ntoys') fout.Close()
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # Open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # Set all the parameter values locally using defaults if necessary grids = cfg['grids'] grids_to_remove = cfg.get('grids_to_remove', None) POIs = cfg['POIs'] opts = cfg['opts'] toys_per_cycle = cfg['toys_per_cycle'] zipname = cfg.get('zipfile', None) statfile = cfg.get('statusfile', None) contours = cfg.get('contours', ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']) min_toys = cfg.get('min_toys', 500) max_toys = cfg.get('max_toys', 5000) signif = cfg.get('signif', 3.0) cl = cfg.get('CL', 0.95) verbose = cfg.get('verbose', False) make_plots = cfg.get('make_plots', False) # Write CLs values into the output even if current toys do not pass validation incomplete = cfg.get('output_incomplete', False) outfile = cfg.get('output', 'hybrid_grid.root') from_asymptotic_settings = cfg.get('from_asymptotic_settings', dict()) # NB: blacklisting not yet implemented for this method # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters' ) and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr( self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) points = [] blacklisted_points = [] # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for # the grid points, as the numerical precision of a given point may change once the grid spacing is # modified. By default we let split_vals do it's thing however fmt_spec = None # In this mode we're doing a classic limit search vs MH instead of a 2D grid. # Most of the same code can be used however. First we'll use the json file containing the # asymptotic limits to create a new grid from scratch. if self.args.from_asymptotic is not None: grids = [] bound_vals = None bound_pars = [] fmt_spec = '%.5g' with open(self.args.from_asymptotic) as limit_json: limits = json.load(limit_json) for m in limits.keys(): limit_vals = [x for x in limits[m].values()] max_limit = max(limit_vals) min_limit = min(limit_vals) # print (min_limit, max_limit) width = max_limit - min_limit max_limit += width * 0.3 min_limit = max(0.0, min_limit - width * 0.3) nsteps = from_asymptotic_settings.get('points', 100) step_width = (max_limit - min_limit) / nsteps grids.append( [m, '%g:%g|%g' % (min_limit, max_limit, step_width), '']) boundlist_file = from_asymptotic_settings.get('boundlist', '') if boundlist_file: with open(boundlist_file) as json_file: bnd = json.load(json_file) bound_pars = list(bnd.keys()) print 'Found bounds for parameters %s' % ','.join( bound_pars) bound_vals = {} for par in bound_pars: bound_vals[par] = list() for mass, bounds in bnd[par].iteritems(): bound_vals[par].append( (float(mass), bounds[0], bounds[1])) bound_vals[par].sort(key=lambda x: x[0]) # print (min_limit, max_limit) # sys.exit(0) for igrid in grids: assert (len(igrid) == 3) if igrid[2] == '': points.extend( itertools.product( utils.split_vals(igrid[0], fmt_spec=fmt_spec), utils.split_vals(igrid[1], fmt_spec=fmt_spec))) else: blacklisted_points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) #In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them: points_to_remove = [] if grids_to_remove is not None: for igrid in grids_to_remove: assert (len(igrid) == 2) points_to_remove.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) for p in points_to_remove: points.remove(p) # This dictionary will keep track of the combine output files for each model point file_dict = {} for p in points: file_dict[p] = {} # The regex we will use to identify output files and extract POI values rgx = re.compile( 'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1])) stats = {} if statfile and os.path.isfile(statfile): with open(statfile) as stat_json: stats = json.load(stat_json) # Can optionally copy output root files into a zip archive # If the user has specified a zipfile we will first # look for output files in this archive before scanning the # current directory if zipname: # Open the zip file in append mode, this should also # create it if it doesn't exist zipf = zipfile.ZipFile(zipname, 'a') for f in zipf.namelist(): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: if seed not in file_dict[p]: # For each model point have a dictionary keyed on the seed number # with a value pointing to the file in the archive in the format # ROOT expects: "zipfile.zip#higgsCombine.blah.root" file_dict[p][seed] = zipname + '#' + f # Now look for files in the local directory for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: # Don't add this file to the list if its seed number is already # a value in the dict. if seed not in file_dict[p]: # If we're using the zipfile we'll add this now and # then delete it from the local directory # But: only in the file is good, we don't want to pollute the zip # file with incomplete or failed jobs if zipname and plot.TFileIsGood(f): zipf.write(f) # assume this throws if it fails print 'Adding %s to %s' % (f, zipname) file_dict[p][seed] = zipname + '#' + f os.remove(f) else: # otherwise just add the file to the dict in the normal way file_dict[p][seed] = f if zipname: zipf.close() # These lists will keep track of the CLs values which we will use # to create the output TGraph2Ds output_x = [] output_y = [] output_data = {} output_ntoys = [] output_clserr = {} output_signif = {} # One list of Z-values per contour for contour in contours: output_data[contour] = [] output_clserr[contour] = [] output_signif[contour] = [] # Also keep track of the number of model points which have met the # CLs criteria total_points = 0 complete_points = 0 for key, val in file_dict.iteritems(): status_changed = True total_points += 1 status_key = ':'.join(key) name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) # First check if we use the status json all_files = val.values() status_files = [] files = [] if status_key in stats: status_files = stats[status_key]['files'] if set(all_files) == set(status_files): print 'For point %s, no files have been updated' % name status_changed = False files = all_files else: files = [x for x in val.values() if plot.TFileIsGood(x)] if set(files) == set( status_files) and len(files) < len(all_files): print 'For point %s, new files exist but they are not declared good' % name status_changed = False else: files = [x for x in val.values() if plot.TFileIsGood(x)] # Merge the HypoTestResult objects from each file into one res = None precomputed = None if status_key in stats and not status_changed and stats[ status_key]["ntoys"] > 0: precomputed = stats[status_key] else: res = self.GetCombinedHypoTest(files) # Do the validation of this model point # ok, point_res = self.ValidateHypoTest(res, min_toys=min_toys, max_toys=max_toys, contours=contours, signif=signif, cl=cl, output=self.args.output, verbose=verbose, precomputed=precomputed) print '>> Point %s [%i toys, %s]' % ( name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE') stats[status_key] = {'files': files, 'ntoys': point_res['ntoys']} for cont in contours: if cont in point_res: stats[status_key][cont] = point_res[cont] if ok: complete_points += 1 # Make plots of the test statistic distributions if requested if res is not None and make_plots: self.PlotTestStat(res, 'plot_' + name, opts=cfg['plot_settings'], poi_vals=(float(key[0]), float(key[1])), point_info=point_res) # Add the resulting CLs values to the output arrays. Normally just # for the model points that passed the validation criteria, but if "output_incomplete" # has been set to true then we'll write all model points where at least one HypoTestResult # is present if (res is not None or precomputed is not None) and (ok or incomplete) and self.args.output: output_x.append(float(key[0])) output_y.append(float(key[1])) output_ntoys.append(point_res['ntoys']) for contour in contours: output_data[contour].append(point_res[contour][0]) output_clserr[contour].append(point_res[contour][1]) output_signif[contour].append(point_res[contour][2]) # Do the job cycle generation if requested if not ok and self.args.cycles > 0: print '>>> Going to generate %i job(s) for point %s' % ( self.args.cycles, key) # Figure out the next seed numbers we need to run by finding the maximum seed number # so far done_cycles = val.keys() new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1 new_cycles = range(new_idx, new_idx + self.args.cycles) print '>>> Done cycles: ' + ','.join( str(x) for x in done_cycles) print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles) # Build to combine command. Here we'll take responsibility for setting the name and the # model parameters, making sure the latter are frozen set_arg = ','.join( ['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % ( name, set_arg, freeze_arg) if self.args.from_asymptotic: mval = key[0] command = [] for par in bound_pars: # The (mass, None, None) is just a trick to make bisect_left do the comparison # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding # issues lower_bound = bisect.bisect_left( bound_vals[par], (float(mval) + 1E-5, None, None)) # If lower_bound == 0 this means we are at or below the lowest mass point, # in which case we should increase by one to take the bounds from this lowest # point if lower_bound == 0: lower_bound += 1 command.append( '%s=%g,%g' % (par, bound_vals[par][lower_bound - 1][1], bound_vals[par][lower_bound - 1][2])) if len(command) > 0: point_args += (' --setPhysicsModelParameterRanges %s' % (':'.join(command))) # print per_mass_point_args point_args += ' --singlePoint %s' % key[1] point_args += ' -m %s' % mval # Build a command for each job cycle setting the number of toys and random seed and passing through any other # user options from the config file or the command line for idx in new_cycles: cmd = ' '.join([ 'combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx ] + self.passthru) self.job_queue.append(cmd) print ">> %i/%i points have completed and require no further toys" % ( complete_points, total_points) self.flush_queue() # Create and write output CLs TGraph2Ds here # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes if self.args.output and not self.args.from_asymptotic: fout = ROOT.TFile(outfile, 'RECREATE') for c in contours: graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c])) graph.SetName(c) fout.WriteTObject(graph, c) # Also write a Graph with the CLsErr graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c])) graph.SetName('clsErr_' + c) fout.WriteTObject(graph, 'clsErr_' + c) # And a Graph with the significance graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c])) graph.SetName('signif_' + c) fout.WriteTObject(graph, 'signif_' + c) graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys)) graph.SetName('ntoys' + c) fout.WriteTObject(graph, 'ntoys') fout.Close() if self.args.output and self.args.from_asymptotic: # Need to collect all the files for each mass point and hadd them: files_by_mass = {} for key, val in file_dict.iteritems(): if key[0] not in files_by_mass: files_by_mass[key[0]] = list() files_by_mass[key[0]].extend(val.values()) for m, files in files_by_mass.iteritems(): gridfile = 'higgsCombine.gridfile.%s.%s.%s.root' % (POIs[0], m, POIs[1]) self.job_queue.append('hadd -f %s %s' % (gridfile, ' '.join(files))) for exp in ['', '0.025', '0.160', '0.500', '0.840', '0.975']: self.job_queue.append(' '.join([ 'combine -M HybridNew --rAbsAcc 0', opts, '--grid %s' % gridfile, '-n .final.%s.%s.%s' % (POIs[0], m, POIs[1]), '-m %s' % (m), ('--expectedFromGrid %s' % exp) if exp else '--noUpdateGrid' ] + self.passthru)) self.flush_queue() if statfile: with open(statfile, 'w') as stat_out: stat_json = json.dumps(stats, sort_keys=True, indent=2, separators=(',', ': ')) stat_out.write(stat_json)