Python TFileIsGood Examples

Programming Language: Python

Namespace/Package Name: CombineHarvester.CombineTools.plotting

Method/Function: TFileIsGood

Examples at hotexamples.com: 11

Python TFileIsGood - 11 examples found. These are the top rated real world Python examples of CombineHarvester.CombineTools.plotting.TFileIsGood extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: plot1DScan.py Project: senka/HTT_20162017

def read(scan, param, files, chop, remove_near_min, rezero,
         remove_delta=None, improve=False, remove_dups=True):
    # print files
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraphFromTree(
        limit, param, '2*%s' % DELTANLL, 'quantileExpected > -1.5')
    # print 'INPUT'
    # graph.Print()
    graph.SetName(scan)
    graph.Sort()
    if remove_dups:
        plot.RemoveGraphXDuplicates(graph)
    if remove_delta is not None:
        plot.RemoveSmallDelta(graph, remove_delta)
    plot.RemoveGraphYAbove(graph, chop)
    plot.ReZeroTGraph(graph, rezero)
    if remove_near_min is not None:
        plot.RemoveNearMin(graph, remove_near_min)
    if improve:
        global NAMECOUNTER
        spline = ROOT.TSpline3("spline3", graph)
        func = ROOT.TF1('splinefn' + str(NAMECOUNTER), partial(Eval, spline),
                        graph.GetX()[0], graph.GetX()[graph.GetN() - 1], 1)
        func.SetNpx(NPX)
        NAMECOUNTER += 1
        plot.ImproveMinimum(graph, func, True)
    # graph.Print()
    if FILTER is not None:
        plot.FilterGraph(graph, FILTER)
    if REMOVE_X_RANGES is not None:
        for remove_x in REMOVE_X_RANGES:
            plot.RemoveInXRange(graph, remove_x[0], remove_x[1])
    return graph

Example #2

Show file

File: plot1DScan.py Project: steggema/CombineHarvester

def read(scan,
         param,
         files,
         chop,
         remove_near_min,
         rezero,
         remove_delta=None,
         improve=False):
    # print files
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    # require quantileExpected > -0.5 to avoid the final point which is always committed twice
    # (even if the fit fails)
    graph = plot.TGraphFromTree(limit, param, '2*deltaNLL',
                                'quantileExpected > -0.5')
    graph.SetName(scan)
    graph.Sort()
    plot.RemoveGraphXDuplicates(graph)
    if remove_delta is not None: plot.RemoveSmallDelta(graph, remove_delta)
    plot.RemoveGraphYAbove(graph, chop)
    plot.ReZeroTGraph(graph, rezero)
    if remove_near_min is not None: plot.RemoveNearMin(graph, remove_near_min)
    if improve:
        global NAMECOUNTER
        spline = ROOT.TSpline3("spline3", graph)
        func = ROOT.TF1('splinefn' + str(NAMECOUNTER), partial(Eval, spline),
                        graph.GetX()[0],
                        graph.GetX()[graph.GetN() - 1], 1)
        NAMECOUNTER += 1
        plot.ImproveMinimum(graph, func, True)
    # graph.Print()
    return graph

Example #3

Show file

def read(scan, param, files, ycut):
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraphFromTree(limit, param, '2*deltaNLL', 'quantileExpected > -1.5')
    graph.SetName(scan)
    graph.Sort()
    plot.RemoveGraphXDuplicates(graph)
    plot.RemoveGraphYAbove(graph, ycut)
    # graph.Print()
    return graph

Example #4

Show file

    def run_method(self):
        limit_sets = defaultdict(list)
        for filename in self.args.input:
            if not plot.TFileIsGood(filename):
                print '>> File %s is corrupt or incomplete, skipping' % filename
            if self.args.use_dirs is False:
                limit_sets['default'].append(filename)
            else:
                label = 'default'
                dirs = filename.split('/')
                # The last dir could be the mass, if so we ignore it and check the next
                if len(dirs) > 1:
                    if not isfloat(dirs[-2]):
                        label = dirs[-2]
                    elif len(dirs) > 2:
                        label = dirs[-3]
                limit_sets[label].append(filename)
        # print limit_sets

        for label, filenames in limit_sets.iteritems():
            js_out = {}
            for filename in filenames:
                file = ROOT.TFile(filename)
                tree = file.Get('limit')
                for evt in tree:
                    mh = str(evt.mh)
                    if mh not in js_out:
                        js_out[mh] = {}
                    if evt.quantileExpected == -1:
                        js_out[mh]['obs'] = evt.limit
                    elif abs(evt.quantileExpected - 0.5) < 1E-4:
                        js_out[mh]["exp0"] = evt.limit
                    elif abs(evt.quantileExpected - 0.025) < 1E-4:
                        js_out[mh]["exp-2"] = evt.limit
                    elif abs(evt.quantileExpected - 0.160) < 1E-4:
                        js_out[mh]["exp-1"] = evt.limit
                    elif abs(evt.quantileExpected - 0.840) < 1E-4:
                        js_out[mh]["exp+1"] = evt.limit
                    elif abs(evt.quantileExpected - 0.975) < 1E-4:
                        js_out[mh]["exp+2"] = evt.limit
            # print js_out
            jsondata = json.dumps(js_out,
                                  sort_keys=True,
                                  indent=2,
                                  separators=(',', ': '))
            # print jsondata
            if self.args.output is not None:
                outname = self.args.output.replace(
                    '.json', '_%s.json' %
                    label) if self.args.use_dirs else self.args.output
                with open(outname, 'w') as out_file:
                    print '>> Writing output %s from files:' % outname
                    pprint.pprint(filenames, indent=2)
                    out_file.write(jsondata)

Example #5

Show file

File: generic2D.py Project: senka/HTT_20162017

def read(scan, param_x, param_y, file):
    # print files
    goodfiles = [f for f in [file] if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraph2DFromTree(limit, param_x, param_y, '2*deltaNLL', 'quantileExpected > -0.5 && deltaNLL > 0')
    best = plot.TGraphFromTree(limit, param_x, param_y, 'quantileExpected > -0.5 && deltaNLL == 0')
    plot.RemoveGraphXDuplicates(best)
    assert(best.GetN() == 1)
    graph.SetName(scan)
    best.SetName(scan+'_best')
    # graph.Print()
    return (graph, best)

Example #6

Show file

def read(scan, param, other_param, files, remove_dups=True):
    # print files
    goodfiles = [f for f in files if plot.TFileIsGood(f)]
    limit = plot.MakeTChain(goodfiles, 'limit')
    graph = plot.TGraphFromTree(limit, param, other_param,
                                'quantileExpected > -0.5')
    # print 'INPUT'
    # graph.Print()
    graph.SetName(scan)
    graph.Sort()
    if remove_dups:
        plot.RemoveGraphXDuplicates(graph)
    # graph.Print()
    return graph

Example #7

Show file

File: plot1DScanNew.py Project: senka/HTT_20162017

 def ReadScanFromTFiles(self,
                        filenames,
                        param_name,
                        tree_selection='quantileExpected > -1.5'):
     # TODO: should report bad files here
     goodfiles = [f for f in filenames if plotting.TFileIsGood(f)]
     if len(goodfiles) == 0:
         raise RuntimeError('[ReadScanFromTFiles] no valid TFiles')
     limit = plotting.MakeTChain(goodfiles, 'limit')
     graph = plotting.TGraphFromTree(limit, param_name, '2*deltaNLL',
                                     tree_selection)
     # graph.SetName(label)
     graph.Sort()
     if self.verbosity >= 2:
         print '[ReadScanFromTFiles] Produced TGraph:'
         graph.Print()
     return graph

Example #8

Show file

File: Output.py Project: senka/HTT_20162017

    def run_method(self):
        limit_sets = defaultdict(list)
        for filename in self.args.input:
            if not plot.TFileIsGood(filename):
                print '>> File %s is corrupt or incomplete, skipping' % filename
                continue
            if not self.args.use_dirs:
                if 'default' not in limit_sets:
                    limit_sets['default'] = ([], [])
                limit_sets['default'][0].append(filename)
            else:
                label = 'default'
                dirs = filename.split('/')
                # The last dir could be the mass, if so we ignore it and check the next
                if len(dirs) > 1:
                    if not isfloat(dirs[-2]):
                        label = dirs[-2]
                    elif len(dirs) > 2:
                        label = dirs[-3]
                if label not in limit_sets:
                    limit_sets[label] = ([], [])
                limit_sets[label][0].append(filename)

        for label, (filenames, toyfiles) in limit_sets.iteritems():
            js_out = {}
            for filename in filenames:
                file = ROOT.TFile(filename)
                tree = file.Get('limit')
                adding_cat_branch = False
                branches = []
                for branch in tree.GetListOfBranches():
                    # Current logic says any branch after quantileExpected is a special
                    # GOF branch labelled according to category
                    if adding_cat_branch:
                        branches.append(branch.GetName())
                    if branch.GetName() == 'quantileExpected':
                        adding_cat_branch = True
                # print branches
                for evt in tree:
                    mh = str(evt.mh)
                    if mh not in js_out:
                        js_out[mh] = {}
                    if evt.quantileExpected != -1:
                        continue
                    if branches:
                        for branch in branches:
                            if branch not in js_out[mh]:
                                js_out[mh][branch] = {}
                                js_out[mh][branch]['toy'] = []
                            if evt.iToy <= 0:
                                js_out[mh][branch]['obs'] = [
                                    getattr(evt, branch)
                                ]
                            else:
                                js_out[mh][branch]['toy'].append(
                                    getattr(evt, branch))
                    else:
                        if 'toy' not in js_out[mh]:
                            js_out[mh]['toy'] = []
                        if evt.iToy <= 0:
                            js_out[mh]['obs'] = [evt.limit]
                        else:
                            js_out[mh]['toy'].append(evt.limit)
            for mh in js_out:
                if all([entry in js_out[mh] for entry in ['toy', 'obs']]):
                    js_out[mh]["p"] = float(
                        len([
                            toy for toy in js_out[mh]['toy']
                            if toy >= js_out[mh]['obs'][0]
                        ])) / len(js_out[mh]['toy'])
                else:
                    for branch in js_out[mh]:
                        js_out[mh][branch]["p"] = float(
                            len([
                                toy for toy in js_out[mh][branch]['toy']
                                if toy >= js_out[mh][branch]['obs'][0]
                            ])) / len(js_out[mh][branch]['toy'])

            # print js_out
            jsondata = json.dumps(js_out,
                                  sort_keys=True,
                                  indent=2,
                                  separators=(',', ': '))
            # print jsondata
            if self.args.output is not None:
                outname = self.args.output.replace(
                    '.json', '_%s.json' %
                    label) if self.args.use_dirs else self.args.output
                with open(outname, 'w') as out_file:
                    print '>> Writing output %s from files:' % outname
                    pprint.pprint(filenames, indent=2)
                    out_file.write(jsondata)

Example #9

Show file

File: Output.py Project: senka/HTT_20162017

    def run_method(self):
        limit_sets = defaultdict(list)
        for filename in self.args.input:
            if not plot.TFileIsGood(filename):
                print '>> File %s is corrupt or incomplete, skipping' % filename
                continue
            if self.args.use_dirs is False:
                limit_sets['default'].append(filename)
            else:
                label = 'default'
                dirs = filename.split('/')
                # The last dir could be the mass, if so we ignore it and check the next
                if len(dirs) > 1:
                    if not isfloat(dirs[-2]):
                        label = dirs[-2]
                    elif len(dirs) > 2:
                        label = dirs[-3]
                limit_sets[label].append(filename)
        # print limit_sets

        for label, filenames in limit_sets.iteritems():
            js_out = {}
            for filename in filenames:
                if plot.TFileIsGood(filename):
                    file = ROOT.TFile(filename)
                    tree = file.Get('limit')
                    for evt in tree:
                        mh = str(evt.mh)
                        if mh not in js_out:
                            js_out[mh] = {}
                            if self.args.toys:
                                js_out[mh]['toys'] = {}
                                for limit in [
                                        'obs', 'exp0', 'exp-2', 'exp-1',
                                        'exp+1', 'exp+2'
                                ]:
                                    js_out[mh]['toys'][limit] = []
                        if self.args.toys:
                            if evt.iToy > 0:
                                if evt.quantileExpected == -1:
                                    js_out[mh]['toys']['obs'].append(evt.limit)
                                elif abs(evt.quantileExpected - 0.5) < 1E-4:
                                    js_out[mh]['toys']["exp0"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.025) < 1E-4:
                                    js_out[mh]['toys']["exp-2"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.160) < 1E-4:
                                    js_out[mh]['toys']["exp-1"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.840) < 1E-4:
                                    js_out[mh]['toys']["exp+1"].append(
                                        evt.limit)
                                elif abs(evt.quantileExpected - 0.975) < 1E-4:
                                    js_out[mh]['toys']["exp+2"].append(
                                        evt.limit)
                            elif evt.iToy == 0:
                                if evt.quantileExpected == -1:
                                    js_out[mh]['obs'].append(evt.limit)

                        else:
                            if evt.quantileExpected == -1:
                                js_out[mh]['obs'] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['obs_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.5) < 1E-4:
                                js_out[mh]["exp0"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp0_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.025) < 1E-4:
                                js_out[mh]["exp-2"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp-2_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.160) < 1E-4:
                                js_out[mh]["exp-1"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp-1_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.840) < 1E-4:
                                js_out[mh]["exp+1"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp+1_err'] = evt.limitErr
                            elif abs(evt.quantileExpected - 0.975) < 1E-4:
                                js_out[mh]["exp+2"] = evt.limit
                                if self.args.limit_err:
                                    js_out[mh]['exp+2_err'] = evt.limitErr

            if self.args.toys:
                for mh in js_out.keys():
                    print "Expected bands will be taken from toys"
                    print mh
                    limits = sorted(js_out[mh]['toys']['obs'])
                    #if mh == '160.0' or mh == '90.0' :
                    #    limits = [x for x in limits if x > 0.1]
                    quantiles = array('d', [0.025, 0.160, 0.5, 0.840, 0.975])
                    res = array('d', [0., 0., 0., 0., 0.])
                    empty = array('i', [0])
                    ROOT.TMath.Quantiles(len(limits), len(quantiles),
                                         array('d', limits), res, quantiles,
                                         True, empty, 1)
                    print res
                    js_out[mh]['exp-2'] = res[0]
                    js_out[mh]['exp-1'] = res[1]
                    js_out[mh]['exp0'] = res[2]
                    js_out[mh]['exp+1'] = res[3]
                    js_out[mh]['exp+2'] = res[4]
            # print js_out
            jsondata = json.dumps(js_out,
                                  sort_keys=True,
                                  indent=2,
                                  separators=(',', ': '))
            # print jsondata
            if self.args.output is not None:
                outname = self.args.output.replace(
                    '.json', '_%s.json' %
                    label) if self.args.use_dirs else self.args.output
                with open(outname, 'w') as out_file:
                    print '>> Writing output %s from files:' % outname
                    pprint.pprint(filenames, indent=2)
                    out_file.write(jsondata)

Example #10

Show file

File: LimitGrids.py Project: steggema/CombineHarvester

    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids = cfg['grids']
        POIs = cfg['POIs']
        opts = cfg['opts']
        toys_per_cycle = cfg['toys_per_cycle']
        zipname = cfg.get('zipfile', None)
        contours = cfg.get('contours',
                           ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys = cfg.get('min_toys', 500)
        max_toys = cfg.get('max_toys', 5000)
        signif = cfg.get('signif', 3.0)
        cl = cfg.get('CL', 0.95)
        verbose = cfg.get('verbose', False)
        make_plots = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete = cfg.get('output_incomplete', False)
        outfile = cfg.get('output', 'hybrid_grid.root')
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters'
                   ) and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(
                self.args,
                'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []
        blacklisted_points = []
        for igrid in grids:
            assert (len(igrid) == 3)
            if igrid[2] == '':
                points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1])))
            else:
                blacklisted_points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1]),
                                      utils.split_vals(igrid[2])))

        # This dictionary will keep track of the combine output files for each model point
        file_dict = {}
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile(
            'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root'
            % (POIs[0], POIs[1]))

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname + '#' + f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' %
                           (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f)  # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname + '#' + f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []

        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key, val in file_dict.iteritems():
            total_points += 1
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
            files = [x for x in val.values() if plot.TFileIsGood(x)]
            # Merge the HypoTestResult objects from each file into one
            res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            #
            ok, point_res = self.ValidateHypoTest(
                res,
                min_toys=min_toys,
                max_toys=max_toys,
                contours=contours,
                signif=signif,
                cl=cl,
                output=self.args.output,
                verbose=verbose) if res is not None else (False, {
                    "ntoys": 0
                })

            print '>> Point %s [%i toys, %s]' % (
                name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')

            if ok:
                complete_points += 1

            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res,
                                  'plot_' + name,
                                  opts=cfg['plot_settings'],
                                  poi_vals=(float(key[0]), float(key[1])))

            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if res is not None and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])

            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (
                    self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx + self.args.cycles)

                print '>>> Done cycles: ' + ','.join(
                    str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)

                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(
                    ['%s=%s,%s=%s' %
                     (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] +
                                      to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (
                    name, set_arg, freeze_arg)
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join([
                        'combine -M HybridNew', opts, point_args,
                        '-T %i' % toys_per_cycle,
                        '-s %i' % idx
                    ] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (
            complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]),
                                      array('d',
                                            output_x), array('d', output_y),
                                      array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_clserr[c]))
                graph.SetName('clsErr_' + c)
                fout.WriteTObject(graph, 'clsErr_' + c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_signif[c]))
                graph.SetName('signif_' + c)
                fout.WriteTObject(graph, 'signif_' + c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x),
                                  array('d', output_y),
                                  array('d', output_ntoys))
            graph.SetName('ntoys' + c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()

Example #11

Show file

    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids = cfg['grids']
        grids_to_remove = cfg.get('grids_to_remove', None)
        POIs = cfg['POIs']
        opts = cfg['opts']
        toys_per_cycle = cfg['toys_per_cycle']
        zipname = cfg.get('zipfile', None)
        statfile = cfg.get('statusfile', None)
        contours = cfg.get('contours',
                           ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys = cfg.get('min_toys', 500)
        max_toys = cfg.get('max_toys', 5000)
        signif = cfg.get('signif', 3.0)
        cl = cfg.get('CL', 0.95)
        verbose = cfg.get('verbose', False)
        make_plots = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete = cfg.get('output_incomplete', False)
        outfile = cfg.get('output', 'hybrid_grid.root')
        from_asymptotic_settings = cfg.get('from_asymptotic_settings', dict())
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters'
                   ) and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(
                self.args,
                'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []
        blacklisted_points = []

        # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for
        # the grid points, as the numerical precision of a given point may change once the grid spacing is
        # modified. By default we let split_vals do it's thing however
        fmt_spec = None

        # In this mode we're doing a classic limit search vs MH instead of a 2D grid.
        # Most of the same code can be used however. First we'll use the json file containing the
        # asymptotic limits to create a new grid from scratch.
        if self.args.from_asymptotic is not None:
            grids = []
            bound_vals = None
            bound_pars = []
            fmt_spec = '%.5g'
            with open(self.args.from_asymptotic) as limit_json:
                limits = json.load(limit_json)
            for m in limits.keys():
                limit_vals = [x for x in limits[m].values()]
                max_limit = max(limit_vals)
                min_limit = min(limit_vals)
                # print (min_limit, max_limit)
                width = max_limit - min_limit
                max_limit += width * 0.3
                min_limit = max(0.0, min_limit - width * 0.3)
                nsteps = from_asymptotic_settings.get('points', 100)
                step_width = (max_limit - min_limit) / nsteps
                grids.append(
                    [m, '%g:%g|%g' % (min_limit, max_limit, step_width), ''])
                boundlist_file = from_asymptotic_settings.get('boundlist', '')
                if boundlist_file:
                    with open(boundlist_file) as json_file:
                        bnd = json.load(json_file)
                    bound_pars = list(bnd.keys())
                    print 'Found bounds for parameters %s' % ','.join(
                        bound_pars)
                    bound_vals = {}
                    for par in bound_pars:
                        bound_vals[par] = list()
                        for mass, bounds in bnd[par].iteritems():
                            bound_vals[par].append(
                                (float(mass), bounds[0], bounds[1]))
                        bound_vals[par].sort(key=lambda x: x[0])
                # print (min_limit, max_limit)
            # sys.exit(0)

        for igrid in grids:
            assert (len(igrid) == 3)
            if igrid[2] == '':
                points.extend(
                    itertools.product(
                        utils.split_vals(igrid[0], fmt_spec=fmt_spec),
                        utils.split_vals(igrid[1], fmt_spec=fmt_spec)))
            else:
                blacklisted_points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1]),
                                      utils.split_vals(igrid[2])))

        #In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them:
        points_to_remove = []
        if grids_to_remove is not None:
            for igrid in grids_to_remove:
                assert (len(igrid) == 2)
                points_to_remove.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1])))

        for p in points_to_remove:
            points.remove(p)

        # This dictionary will keep track of the combine output files for each model point
        file_dict = {}
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile(
            'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root'
            % (POIs[0], POIs[1]))

        stats = {}
        if statfile and os.path.isfile(statfile):
            with open(statfile) as stat_json:
                stats = json.load(stat_json)

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname + '#' + f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' %
                           (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f)  # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname + '#' + f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []

        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key, val in file_dict.iteritems():
            status_changed = True
            total_points += 1
            status_key = ':'.join(key)
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])

            # First check if we use the status json
            all_files = val.values()
            status_files = []
            files = []

            if status_key in stats:
                status_files = stats[status_key]['files']
                if set(all_files) == set(status_files):
                    print 'For point %s, no files have been updated' % name
                    status_changed = False
                    files = all_files
                else:
                    files = [x for x in val.values() if plot.TFileIsGood(x)]
                    if set(files) == set(
                            status_files) and len(files) < len(all_files):
                        print 'For point %s, new files exist but they are not declared good' % name
                        status_changed = False
            else:
                files = [x for x in val.values() if plot.TFileIsGood(x)]

            # Merge the HypoTestResult objects from each file into one
            res = None
            precomputed = None
            if status_key in stats and not status_changed and stats[
                    status_key]["ntoys"] > 0:
                precomputed = stats[status_key]
            else:
                res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            #
            ok, point_res = self.ValidateHypoTest(res,
                                                  min_toys=min_toys,
                                                  max_toys=max_toys,
                                                  contours=contours,
                                                  signif=signif,
                                                  cl=cl,
                                                  output=self.args.output,
                                                  verbose=verbose,
                                                  precomputed=precomputed)

            print '>> Point %s [%i toys, %s]' % (
                name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')

            stats[status_key] = {'files': files, 'ntoys': point_res['ntoys']}
            for cont in contours:
                if cont in point_res:
                    stats[status_key][cont] = point_res[cont]

            if ok:
                complete_points += 1

            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res,
                                  'plot_' + name,
                                  opts=cfg['plot_settings'],
                                  poi_vals=(float(key[0]), float(key[1])),
                                  point_info=point_res)

            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if (res is not None or precomputed
                    is not None) and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])

            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (
                    self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx + self.args.cycles)

                print '>>> Done cycles: ' + ','.join(
                    str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)

                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(
                    ['%s=%s,%s=%s' %
                     (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] +
                                      to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (
                    name, set_arg, freeze_arg)
                if self.args.from_asymptotic:
                    mval = key[0]
                    command = []
                    for par in bound_pars:
                        # The (mass, None, None) is just a trick to make bisect_left do the comparison
                        # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
                        # issues
                        lower_bound = bisect.bisect_left(
                            bound_vals[par], (float(mval) + 1E-5, None, None))
                        # If lower_bound == 0 this means we are at or below the lowest mass point,
                        # in which case we should increase by one to take the bounds from this lowest
                        # point
                        if lower_bound == 0:
                            lower_bound += 1
                        command.append(
                            '%s=%g,%g' %
                            (par, bound_vals[par][lower_bound - 1][1],
                             bound_vals[par][lower_bound - 1][2]))
                    if len(command) > 0:
                        point_args += (' --setPhysicsModelParameterRanges %s' %
                                       (':'.join(command)))
                    # print per_mass_point_args
                    point_args += ' --singlePoint %s' % key[1]
                    point_args += ' -m %s' % mval
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join([
                        'combine -M HybridNew', opts, point_args,
                        '-T %i' % toys_per_cycle,
                        '-s %i' % idx
                    ] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (
            complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output and not self.args.from_asymptotic:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]),
                                      array('d',
                                            output_x), array('d', output_y),
                                      array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_clserr[c]))
                graph.SetName('clsErr_' + c)
                fout.WriteTObject(graph, 'clsErr_' + c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_signif[c]))
                graph.SetName('signif_' + c)
                fout.WriteTObject(graph, 'signif_' + c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x),
                                  array('d', output_y),
                                  array('d', output_ntoys))
            graph.SetName('ntoys' + c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()

        if self.args.output and self.args.from_asymptotic:
            # Need to collect all the files for each mass point and hadd them:
            files_by_mass = {}
            for key, val in file_dict.iteritems():
                if key[0] not in files_by_mass:
                    files_by_mass[key[0]] = list()
                files_by_mass[key[0]].extend(val.values())
            for m, files in files_by_mass.iteritems():
                gridfile = 'higgsCombine.gridfile.%s.%s.%s.root' % (POIs[0], m,
                                                                    POIs[1])
                self.job_queue.append('hadd -f %s %s' %
                                      (gridfile, ' '.join(files)))
                for exp in ['', '0.025', '0.160', '0.500', '0.840', '0.975']:
                    self.job_queue.append(' '.join([
                        'combine -M HybridNew --rAbsAcc 0', opts,
                        '--grid %s' % gridfile,
                        '-n .final.%s.%s.%s' % (POIs[0], m, POIs[1]),
                        '-m %s' % (m), ('--expectedFromGrid %s' %
                                        exp) if exp else '--noUpdateGrid'
                    ] + self.passthru))
                self.flush_queue()

        if statfile:
            with open(statfile, 'w') as stat_out:
                stat_json = json.dumps(stats,
                                       sort_keys=True,
                                       indent=2,
                                       separators=(',', ': '))
                stat_out.write(stat_json)