Example #1
0
def read(scan, param, files, chop, remove_near_min, rezero,
         remove_delta=None, improve=False, remove_dups=True):
    # print files
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraphFromTree(
        limit, param, '2*%s' % DELTANLL, 'quantileExpected > -1.5')
    # print 'INPUT'
    # graph.Print()
    graph.SetName(scan)
    graph.Sort()
    if remove_dups:
        plot.RemoveGraphXDuplicates(graph)
    if remove_delta is not None:
        plot.RemoveSmallDelta(graph, remove_delta)
    plot.RemoveGraphYAbove(graph, chop)
    plot.ReZeroTGraph(graph, rezero)
    if remove_near_min is not None:
        plot.RemoveNearMin(graph, remove_near_min)
    if improve:
        global NAMECOUNTER
        spline = ROOT.TSpline3("spline3", graph)
        func = ROOT.TF1('splinefn' + str(NAMECOUNTER), partial(Eval, spline),
                        graph.GetX()[0], graph.GetX()[graph.GetN() - 1], 1)
        func.SetNpx(NPX)
        NAMECOUNTER += 1
        plot.ImproveMinimum(graph, func, True)
    # graph.Print()
    if FILTER is not None:
        plot.FilterGraph(graph, FILTER)
    if REMOVE_X_RANGES is not None:
        for remove_x in REMOVE_X_RANGES:
            plot.RemoveInXRange(graph, remove_x[0], remove_x[1])
    return graph
Example #2
0
def read(scan,
         param,
         files,
         chop,
         remove_near_min,
         rezero,
         remove_delta=None,
         improve=False):
    # print files
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    # require quantileExpected > -0.5 to avoid the final point which is always committed twice
    # (even if the fit fails)
    graph = plot.TGraphFromTree(limit, param, '2*deltaNLL',
                                'quantileExpected > -0.5')
    graph.SetName(scan)
    graph.Sort()
    plot.RemoveGraphXDuplicates(graph)
    if remove_delta is not None: plot.RemoveSmallDelta(graph, remove_delta)
    plot.RemoveGraphYAbove(graph, chop)
    plot.ReZeroTGraph(graph, rezero)
    if remove_near_min is not None: plot.RemoveNearMin(graph, remove_near_min)
    if improve:
        global NAMECOUNTER
        spline = ROOT.TSpline3("spline3", graph)
        func = ROOT.TF1('splinefn' + str(NAMECOUNTER), partial(Eval, spline),
                        graph.GetX()[0],
                        graph.GetX()[graph.GetN() - 1], 1)
        NAMECOUNTER += 1
        plot.ImproveMinimum(graph, func, True)
    # graph.Print()
    return graph
Example #3
0
def read(scan, param, files, ycut):
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraphFromTree(limit, param, '2*deltaNLL', 'quantileExpected > -1.5')
    graph.SetName(scan)
    graph.Sort()
    plot.RemoveGraphXDuplicates(graph)
    plot.RemoveGraphYAbove(graph, ycut)
    # graph.Print()
    return graph
Example #4
0
    def run_method(self):
        limit_sets = defaultdict(list)
        for filename in self.args.input:
            if not plot.TFileIsGood(filename):
                print '>> File %s is corrupt or incomplete, skipping' % filename
            if self.args.use_dirs is False:
                limit_sets['default'].append(filename)
            else:
                label = 'default'
                dirs = filename.split('/')
                # The last dir could be the mass, if so we ignore it and check the next
                if len(dirs) > 1:
                    if not isfloat(dirs[-2]):
                        label = dirs[-2]
                    elif len(dirs) > 2:
                        label = dirs[-3]
                limit_sets[label].append(filename)
        # print limit_sets

        for label, filenames in limit_sets.iteritems():
            js_out = {}
            for filename in filenames:
                file = ROOT.TFile(filename)
                tree = file.Get('limit')
                for evt in tree:
                    mh = str(evt.mh)
                    if mh not in js_out:
                        js_out[mh] = {}
                    if evt.quantileExpected == -1:
                        js_out[mh]['obs'] = evt.limit
                    elif abs(evt.quantileExpected - 0.5) < 1E-4:
                        js_out[mh]["exp0"] = evt.limit
                    elif abs(evt.quantileExpected - 0.025) < 1E-4:
                        js_out[mh]["exp-2"] = evt.limit
                    elif abs(evt.quantileExpected - 0.160) < 1E-4:
                        js_out[mh]["exp-1"] = evt.limit
                    elif abs(evt.quantileExpected - 0.840) < 1E-4:
                        js_out[mh]["exp+1"] = evt.limit
                    elif abs(evt.quantileExpected - 0.975) < 1E-4:
                        js_out[mh]["exp+2"] = evt.limit
            # print js_out
            jsondata = json.dumps(js_out,
                                  sort_keys=True,
                                  indent=2,
                                  separators=(',', ': '))
            # print jsondata
            if self.args.output is not None:
                outname = self.args.output.replace(
                    '.json', '_%s.json' %
                    label) if self.args.use_dirs else self.args.output
                with open(outname, 'w') as out_file:
                    print '>> Writing output %s from files:' % outname
                    pprint.pprint(filenames, indent=2)
                    out_file.write(jsondata)
Example #5
0
def read(scan, param_x, param_y, file):
    # print files
    goodfiles = [f for f in [file] if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraph2DFromTree(limit, param_x, param_y, '2*deltaNLL', 'quantileExpected > -0.5 && deltaNLL > 0')
    best = plot.TGraphFromTree(limit, param_x, param_y, 'quantileExpected > -0.5 && deltaNLL == 0')
    plot.RemoveGraphXDuplicates(best)
    assert(best.GetN() == 1)
    graph.SetName(scan)
    best.SetName(scan+'_best')
    # graph.Print()
    return (graph, best)
Example #6
0
def read(scan, param, other_param, files, remove_dups=True):
    # print files
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraphFromTree(limit, param, other_param,
                                'quantileExpected > -0.5')
    # print 'INPUT'
    # graph.Print()
    graph.SetName(scan)
    graph.Sort()
    if remove_dups:
        plot.RemoveGraphXDuplicates(graph)
    # graph.Print()
    return graph
Example #7
0
 def ReadScanFromTFiles(self,
                        filenames,
                        param_name,
                        tree_selection='quantileExpected > -1.5'):
     # TODO: should report bad files here
     goodfiles = [f for f in filenames if plotting.TFileIsGood(f)]
     if len(goodfiles) == 0:
         raise RuntimeError('[ReadScanFromTFiles] no valid TFiles')
     limit = plotting.MakeTChain(goodfiles, 'limit')
     graph = plotting.TGraphFromTree(limit, param_name, '2*deltaNLL',
                                     tree_selection)
     # graph.SetName(label)
     graph.Sort()
     if self.verbosity >= 2:
         print '[ReadScanFromTFiles] Produced TGraph:'
         graph.Print()
     return graph
Example #8
0
    def run_method(self):
        limit_sets = defaultdict(list)
        for filename in self.args.input:
            if not plot.TFileIsGood(filename):
                print '>> File %s is corrupt or incomplete, skipping' % filename
                continue
            if not self.args.use_dirs:
                if 'default' not in limit_sets:
                    limit_sets['default'] = ([], [])
                limit_sets['default'][0].append(filename)
            else:
                label = 'default'
                dirs = filename.split('/')
                # The last dir could be the mass, if so we ignore it and check the next
                if len(dirs) > 1:
                    if not isfloat(dirs[-2]):
                        label = dirs[-2]
                    elif len(dirs) > 2:
                        label = dirs[-3]
                if label not in limit_sets:
                    limit_sets[label] = ([], [])
                limit_sets[label][0].append(filename)

        for label, (filenames, toyfiles) in limit_sets.iteritems():
            js_out = {}
            for filename in filenames:
                file = ROOT.TFile(filename)
                tree = file.Get('limit')
                adding_cat_branch = False
                branches = []
                for branch in tree.GetListOfBranches():
                    # Current logic says any branch after quantileExpected is a special
                    # GOF branch labelled according to category
                    if adding_cat_branch:
                        branches.append(branch.GetName())
                    if branch.GetName() == 'quantileExpected':
                        adding_cat_branch = True
                # print branches
                for evt in tree:
                    mh = str(evt.mh)
                    if mh not in js_out:
                        js_out[mh] = {}
                    if evt.quantileExpected != -1:
                        continue
                    if branches:
                        for branch in branches:
                            if branch not in js_out[mh]:
                                js_out[mh][branch] = {}
                                js_out[mh][branch]['toy'] = []
                            if evt.iToy <= 0:
                                js_out[mh][branch]['obs'] = [
                                    getattr(evt, branch)
                                ]
                            else:
                                js_out[mh][branch]['toy'].append(
                                    getattr(evt, branch))
                    else:
                        if 'toy' not in js_out[mh]:
                            js_out[mh]['toy'] = []
                        if evt.iToy <= 0:
                            js_out[mh]['obs'] = [evt.limit]
                        else:
                            js_out[mh]['toy'].append(evt.limit)
            for mh in js_out:
                if all([entry in js_out[mh] for entry in ['toy', 'obs']]):
                    js_out[mh]["p"] = float(
                        len([
                            toy for toy in js_out[mh]['toy']
                            if toy >= js_out[mh]['obs'][0]
                        ])) / len(js_out[mh]['toy'])
                else:
                    for branch in js_out[mh]:
                        js_out[mh][branch]["p"] = float(
                            len([
                                toy for toy in js_out[mh][branch]['toy']
                                if toy >= js_out[mh][branch]['obs'][0]
                            ])) / len(js_out[mh][branch]['toy'])

            # print js_out
            jsondata = json.dumps(js_out,
                                  sort_keys=True,
                                  indent=2,
                                  separators=(',', ': '))
            # print jsondata
            if self.args.output is not None:
                outname = self.args.output.replace(
                    '.json', '_%s.json' %
                    label) if self.args.use_dirs else self.args.output
                with open(outname, 'w') as out_file:
                    print '>> Writing output %s from files:' % outname
                    pprint.pprint(filenames, indent=2)
                    out_file.write(jsondata)
Example #9
0
    def run_method(self):
        limit_sets = defaultdict(list)
        for filename in self.args.input:
            if not plot.TFileIsGood(filename):
                print '>> File %s is corrupt or incomplete, skipping' % filename
                continue
            if self.args.use_dirs is False:
                limit_sets['default'].append(filename)
            else:
                label = 'default'
                dirs = filename.split('/')
                # The last dir could be the mass, if so we ignore it and check the next
                if len(dirs) > 1:
                    if not isfloat(dirs[-2]):
                        label = dirs[-2]
                    elif len(dirs) > 2:
                        label = dirs[-3]
                limit_sets[label].append(filename)
        # print limit_sets

        for label, filenames in limit_sets.iteritems():
            js_out = {}
            for filename in filenames:
                if plot.TFileIsGood(filename):
                    file = ROOT.TFile(filename)
                    tree = file.Get('limit')
                    for evt in tree:
                        mh = str(evt.mh)
                        if mh not in js_out:
                            js_out[mh] = {}
                            if self.args.toys:
                                js_out[mh]['toys'] = {}
                                for limit in [
                                        'obs', 'exp0', 'exp-2', 'exp-1',
                                        'exp+1', 'exp+2'
                                ]:
                                    js_out[mh]['toys'][limit] = []
                        if self.args.toys:
                            if evt.iToy > 0:
                                if evt.quantileExpected == -1:
                                    js_out[mh]['toys']['obs'].append(evt.limit)
                                elif abs(evt.quantileExpected - 0.5) < 1E-4:
                                    js_out[mh]['toys']["exp0"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.025) < 1E-4:
                                    js_out[mh]['toys']["exp-2"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.160) < 1E-4:
                                    js_out[mh]['toys']["exp-1"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.840) < 1E-4:
                                    js_out[mh]['toys']["exp+1"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.975) < 1E-4:
                                    js_out[mh]['toys']["exp+2"].append(
                                        evt.limit)
                            elif evt.iToy == 0:
                                if evt.quantileExpected == -1:
                                    js_out[mh]['obs'].append(evt.limit)

                        else:
                            if evt.quantileExpected == -1:
                                js_out[mh]['obs'] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['obs_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.5) < 1E-4:
                                js_out[mh]["exp0"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp0_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.025) < 1E-4:
                                js_out[mh]["exp-2"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp-2_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.160) < 1E-4:
                                js_out[mh]["exp-1"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp-1_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.840) < 1E-4:
                                js_out[mh]["exp+1"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp+1_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.975) < 1E-4:
                                js_out[mh]["exp+2"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp+2_err'] = evt.limitErr

            if self.args.toys:
                for mh in js_out.keys():
                    print "Expected bands will be taken from toys"
                    print mh
                    limits = sorted(js_out[mh]['toys']['obs'])
                    #if mh == '160.0' or mh == '90.0' :
                    #    limits = [x for x in limits if x > 0.1]
                    quantiles = array('d', [0.025, 0.160, 0.5, 0.840, 0.975])
                    res = array('d', [0., 0., 0., 0., 0.])
                    empty = array('i', [0])
                    ROOT.TMath.Quantiles(len(limits), len(quantiles),
                                         array('d', limits), res, quantiles,
                                         True, empty, 1)
                    print res
                    js_out[mh]['exp-2'] = res[0]
                    js_out[mh]['exp-1'] = res[1]
                    js_out[mh]['exp0'] = res[2]
                    js_out[mh]['exp+1'] = res[3]
                    js_out[mh]['exp+2'] = res[4]
            # print js_out
            jsondata = json.dumps(js_out,
                                  sort_keys=True,
                                  indent=2,
                                  separators=(',', ': '))
            # print jsondata
            if self.args.output is not None:
                outname = self.args.output.replace(
                    '.json', '_%s.json' %
                    label) if self.args.use_dirs else self.args.output
                with open(outname, 'w') as out_file:
                    print '>> Writing output %s from files:' % outname
                    pprint.pprint(filenames, indent=2)
                    out_file.write(jsondata)
Example #10
0
    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids = cfg['grids']
        POIs = cfg['POIs']
        opts = cfg['opts']
        toys_per_cycle = cfg['toys_per_cycle']
        zipname = cfg.get('zipfile', None)
        contours = cfg.get('contours',
                           ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys = cfg.get('min_toys', 500)
        max_toys = cfg.get('max_toys', 5000)
        signif = cfg.get('signif', 3.0)
        cl = cfg.get('CL', 0.95)
        verbose = cfg.get('verbose', False)
        make_plots = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete = cfg.get('output_incomplete', False)
        outfile = cfg.get('output', 'hybrid_grid.root')
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters'
                   ) and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(
                self.args,
                'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []
        blacklisted_points = []
        for igrid in grids:
            assert (len(igrid) == 3)
            if igrid[2] == '':
                points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1])))
            else:
                blacklisted_points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1]),
                                      utils.split_vals(igrid[2])))

        # This dictionary will keep track of the combine output files for each model point
        file_dict = {}
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile(
            'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root'
            % (POIs[0], POIs[1]))

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname + '#' + f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' %
                           (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f)  # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname + '#' + f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []

        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key, val in file_dict.iteritems():
            total_points += 1
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
            files = [x for x in val.values() if plot.TFileIsGood(x)]
            # Merge the HypoTestResult objects from each file into one
            res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            #
            ok, point_res = self.ValidateHypoTest(
                res,
                min_toys=min_toys,
                max_toys=max_toys,
                contours=contours,
                signif=signif,
                cl=cl,
                output=self.args.output,
                verbose=verbose) if res is not None else (False, {
                    "ntoys": 0
                })

            print '>> Point %s [%i toys, %s]' % (
                name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')

            if ok:
                complete_points += 1

            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res,
                                  'plot_' + name,
                                  opts=cfg['plot_settings'],
                                  poi_vals=(float(key[0]), float(key[1])))

            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if res is not None and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])

            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (
                    self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx + self.args.cycles)

                print '>>> Done cycles: ' + ','.join(
                    str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)

                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(
                    ['%s=%s,%s=%s' %
                     (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] +
                                      to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (
                    name, set_arg, freeze_arg)
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join([
                        'combine -M HybridNew', opts, point_args,
                        '-T %i' % toys_per_cycle,
                        '-s %i' % idx
                    ] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (
            complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]),
                                      array('d',
                                            output_x), array('d', output_y),
                                      array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_clserr[c]))
                graph.SetName('clsErr_' + c)
                fout.WriteTObject(graph, 'clsErr_' + c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_signif[c]))
                graph.SetName('signif_' + c)
                fout.WriteTObject(graph, 'signif_' + c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x),
                                  array('d', output_y),
                                  array('d', output_ntoys))
            graph.SetName('ntoys' + c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()
Example #11
0
    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids = cfg['grids']
        grids_to_remove = cfg.get('grids_to_remove', None)
        POIs = cfg['POIs']
        opts = cfg['opts']
        toys_per_cycle = cfg['toys_per_cycle']
        zipname = cfg.get('zipfile', None)
        statfile = cfg.get('statusfile', None)
        contours = cfg.get('contours',
                           ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys = cfg.get('min_toys', 500)
        max_toys = cfg.get('max_toys', 5000)
        signif = cfg.get('signif', 3.0)
        cl = cfg.get('CL', 0.95)
        verbose = cfg.get('verbose', False)
        make_plots = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete = cfg.get('output_incomplete', False)
        outfile = cfg.get('output', 'hybrid_grid.root')
        from_asymptotic_settings = cfg.get('from_asymptotic_settings', dict())
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters'
                   ) and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(
                self.args,
                'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []
        blacklisted_points = []

        # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for
        # the grid points, as the numerical precision of a given point may change once the grid spacing is
        # modified. By default we let split_vals do it's thing however
        fmt_spec = None

        # In this mode we're doing a classic limit search vs MH instead of a 2D grid.
        # Most of the same code can be used however. First we'll use the json file containing the
        # asymptotic limits to create a new grid from scratch.
        if self.args.from_asymptotic is not None:
            grids = []
            bound_vals = None
            bound_pars = []
            fmt_spec = '%.5g'
            with open(self.args.from_asymptotic) as limit_json:
                limits = json.load(limit_json)
            for m in limits.keys():
                limit_vals = [x for x in limits[m].values()]
                max_limit = max(limit_vals)
                min_limit = min(limit_vals)
                # print (min_limit, max_limit)
                width = max_limit - min_limit
                max_limit += width * 0.3
                min_limit = max(0.0, min_limit - width * 0.3)
                nsteps = from_asymptotic_settings.get('points', 100)
                step_width = (max_limit - min_limit) / nsteps
                grids.append(
                    [m, '%g:%g|%g' % (min_limit, max_limit, step_width), ''])
                boundlist_file = from_asymptotic_settings.get('boundlist', '')
                if boundlist_file:
                    with open(boundlist_file) as json_file:
                        bnd = json.load(json_file)
                    bound_pars = list(bnd.keys())
                    print 'Found bounds for parameters %s' % ','.join(
                        bound_pars)
                    bound_vals = {}
                    for par in bound_pars:
                        bound_vals[par] = list()
                        for mass, bounds in bnd[par].iteritems():
                            bound_vals[par].append(
                                (float(mass), bounds[0], bounds[1]))
                        bound_vals[par].sort(key=lambda x: x[0])
                # print (min_limit, max_limit)
            # sys.exit(0)

        for igrid in grids:
            assert (len(igrid) == 3)
            if igrid[2] == '':
                points.extend(
                    itertools.product(
                        utils.split_vals(igrid[0], fmt_spec=fmt_spec),
                        utils.split_vals(igrid[1], fmt_spec=fmt_spec)))
            else:
                blacklisted_points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1]),
                                      utils.split_vals(igrid[2])))

        #In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them:
        points_to_remove = []
        if grids_to_remove is not None:
            for igrid in grids_to_remove:
                assert (len(igrid) == 2)
                points_to_remove.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1])))

        for p in points_to_remove:
            points.remove(p)

        # This dictionary will keep track of the combine output files for each model point
        file_dict = {}
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile(
            'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root'
            % (POIs[0], POIs[1]))

        stats = {}
        if statfile and os.path.isfile(statfile):
            with open(statfile) as stat_json:
                stats = json.load(stat_json)

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname + '#' + f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' %
                           (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f)  # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname + '#' + f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []

        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key, val in file_dict.iteritems():
            status_changed = True
            total_points += 1
            status_key = ':'.join(key)
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])

            # First check if we use the status json
            all_files = val.values()
            status_files = []
            files = []

            if status_key in stats:
                status_files = stats[status_key]['files']
                if set(all_files) == set(status_files):
                    print 'For point %s, no files have been updated' % name
                    status_changed = False
                    files = all_files
                else:
                    files = [x for x in val.values() if plot.TFileIsGood(x)]
                    if set(files) == set(
                            status_files) and len(files) < len(all_files):
                        print 'For point %s, new files exist but they are not declared good' % name
                        status_changed = False
            else:
                files = [x for x in val.values() if plot.TFileIsGood(x)]

            # Merge the HypoTestResult objects from each file into one
            res = None
            precomputed = None
            if status_key in stats and not status_changed and stats[
                    status_key]["ntoys"] > 0:
                precomputed = stats[status_key]
            else:
                res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            #
            ok, point_res = self.ValidateHypoTest(res,
                                                  min_toys=min_toys,
                                                  max_toys=max_toys,
                                                  contours=contours,
                                                  signif=signif,
                                                  cl=cl,
                                                  output=self.args.output,
                                                  verbose=verbose,
                                                  precomputed=precomputed)

            print '>> Point %s [%i toys, %s]' % (
                name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')

            stats[status_key] = {'files': files, 'ntoys': point_res['ntoys']}
            for cont in contours:
                if cont in point_res:
                    stats[status_key][cont] = point_res[cont]

            if ok:
                complete_points += 1

            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res,
                                  'plot_' + name,
                                  opts=cfg['plot_settings'],
                                  poi_vals=(float(key[0]), float(key[1])),
                                  point_info=point_res)

            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if (res is not None or precomputed
                    is not None) and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])

            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (
                    self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx + self.args.cycles)

                print '>>> Done cycles: ' + ','.join(
                    str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)

                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(
                    ['%s=%s,%s=%s' %
                     (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] +
                                      to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (
                    name, set_arg, freeze_arg)
                if self.args.from_asymptotic:
                    mval = key[0]
                    command = []
                    for par in bound_pars:
                        # The (mass, None, None) is just a trick to make bisect_left do the comparison
                        # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
                        # issues
                        lower_bound = bisect.bisect_left(
                            bound_vals[par], (float(mval) + 1E-5, None, None))
                        # If lower_bound == 0 this means we are at or below the lowest mass point,
                        # in which case we should increase by one to take the bounds from this lowest
                        # point
                        if lower_bound == 0:
                            lower_bound += 1
                        command.append(
                            '%s=%g,%g' %
                            (par, bound_vals[par][lower_bound - 1][1],
                             bound_vals[par][lower_bound - 1][2]))
                    if len(command) > 0:
                        point_args += (' --setPhysicsModelParameterRanges %s' %
                                       (':'.join(command)))
                    # print per_mass_point_args
                    point_args += ' --singlePoint %s' % key[1]
                    point_args += ' -m %s' % mval
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join([
                        'combine -M HybridNew', opts, point_args,
                        '-T %i' % toys_per_cycle,
                        '-s %i' % idx
                    ] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (
            complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output and not self.args.from_asymptotic:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]),
                                      array('d',
                                            output_x), array('d', output_y),
                                      array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_clserr[c]))
                graph.SetName('clsErr_' + c)
                fout.WriteTObject(graph, 'clsErr_' + c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_signif[c]))
                graph.SetName('signif_' + c)
                fout.WriteTObject(graph, 'signif_' + c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x),
                                  array('d', output_y),
                                  array('d', output_ntoys))
            graph.SetName('ntoys' + c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()

        if self.args.output and self.args.from_asymptotic:
            # Need to collect all the files for each mass point and hadd them:
            files_by_mass = {}
            for key, val in file_dict.iteritems():
                if key[0] not in files_by_mass:
                    files_by_mass[key[0]] = list()
                files_by_mass[key[0]].extend(val.values())
            for m, files in files_by_mass.iteritems():
                gridfile = 'higgsCombine.gridfile.%s.%s.%s.root' % (POIs[0], m,
                                                                    POIs[1])
                self.job_queue.append('hadd -f %s %s' %
                                      (gridfile, ' '.join(files)))
                for exp in ['', '0.025', '0.160', '0.500', '0.840', '0.975']:
                    self.job_queue.append(' '.join([
                        'combine -M HybridNew --rAbsAcc 0', opts,
                        '--grid %s' % gridfile,
                        '-n .final.%s.%s.%s' % (POIs[0], m, POIs[1]),
                        '-m %s' % (m), ('--expectedFromGrid %s' %
                                        exp) if exp else '--noUpdateGrid'
                    ] + self.passthru))
                self.flush_queue()

        if statfile:
            with open(statfile, 'w') as stat_out:
                stat_json = json.dumps(stats,
                                       sort_keys=True,
                                       indent=2,
                                       separators=(',', ': '))
                stat_out.write(stat_json)