Ejemplo n.º 1
0
    def run_method(self):
        # Put the method back in because we always take it out
        self.put_back_arg('method', '-M')

        cmd_queue = []
        subbed_vars = {}

        if self.args.mass is not None:
            mass_vals = utils.split_vals(self.args.mass)
            subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals]
            self.passthru.extend(['-m', '%(MASS)s'])

        if self.args.points is not None:
            self.passthru.extend(['--points', self.args.points])
        if (self.args.split_points is not None and
                self.args.split_points > 0 and
                self.args.points is not None):
            points = int(self.args.points)
            split = self.args.split_points
            start = 0
            ranges = []
            while (start + (split - 1)) <= points:
                ranges.append((start, start + (split - 1)))
                start += split
            if start < points:
                ranges.append((start, points - 1))
            subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges]
            self.passthru.extend(
                ['--firstPoint %(P_START)s --lastPoint %(P_END)s'])
            self.args.name += '.POINTS.%(P_START)s.%(P_END)s'

        # can only put the name option back now because we might have modified
        # it from what the user specified
        self.put_back_arg('name', '-n')
        proto = 'combine ' + (' '.join(self.passthru))
        for it in itertools.product(*subbed_vars.values()):
            keys = subbed_vars.keys()
            dict = {}
            for i, k in enumerate(keys):
                for tuple_i, tuple_ele in enumerate(k):
                    dict[tuple_ele] = it[i][tuple_i]
            self.job_queue.append(proto % dict)
        self.flush_queue()
Ejemplo n.º 2
0
    def run_method(self):
        # Put the method back in because we always take it out
        self.put_back_arg('method', '-M')

        cmd_queue = []
        subbed_vars = {}

        if self.args.mass is not None:
            mass_vals = utils.split_vals(self.args.mass)
            subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals]
            self.passthru.extend(['-m', '%(MASS)s'])

        if self.args.singlePoint is not None:
            single_points = utils.split_vals(self.args.singlePoint)
            subbed_vars[('SINGLEPOINT',)] = [(pval,) for pval in single_points]
            self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s'])
            self.args.name += '.POINT.%(SINGLEPOINT)s'

        if self.args.boundlist is not None:
          subbed_vars = {}
          with open(self.args.boundlist) as json_file:
            bnd = json.load(json_file)
          command1=['' for i in mass_vals]
          #command2=['' for i in mass_vals]
          i=0
          for mval in mass_vals:
            for model in bnd:
              if not (command1[i]==''): command1[i]=command1[i]+':'
              #if not (command2[i]==''): command2[i]=command2[i]+','
              command1[i]=command1[i]+model+'=0,'+str(bnd[model][mval])
            #  command2[i]=command2[i]+model+'=0' #'='+str(float(bnd[model][mval])/2.0)
            i+=1
          #subbed_vars[('MASS', 'MODELBOUNDONE', 'MODELBOUNDTWO')] = [(mass_vals[i], command1[i], command2[i]) for i in range(len(mass_vals))]
          subbed_vars[('MASS', 'MODELBOUNDONE')] = [(mass_vals[i], command1[i]) for i in range(len(mass_vals))]
          self.passthru.extend(['--setPhysicsModelParameterRanges',  '%(MODELBOUNDONE)s'])
          #self.passthru.extend(['--setPhysicsModelParameters',  '%(MODELBOUNDTWO)s'])

        if self.args.points is not None:
            self.passthru.extend(['--points', self.args.points])
        if (self.args.split_points is not None and
                self.args.split_points > 0 and
                self.args.points is not None):
            points = int(self.args.points)
            split = self.args.split_points
            start = 0
            ranges = []
            while (start + (split - 1)) <= points:
            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root"
            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
            #        # Send job, if the file it's supposed to create doesn't exist yet
            #        # or if the file is empty because the previous job didn't finish
                ranges.append((start, start + (split - 1)))
                start += split
            if start < points:
            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root"
            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
                ranges.append((start, points - 1))
            #if (ranges == []):
            #    print "No jobs were created; All files already exist"
            #    exit()
            subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges]
            self.passthru.extend(
                ['--firstPoint %(P_START)s --lastPoint %(P_END)s'])
            self.args.name += '.POINTS.%(P_START)s.%(P_END)s'

        # can only put the name option back now because we might have modified
        # it from what the user specified
        self.put_back_arg('name', '-n')
        proto = 'combine ' + (' '.join(self.passthru))
        for it in itertools.product(*subbed_vars.values()):
            keys = subbed_vars.keys()
            dict = {}
            for i, k in enumerate(keys):
                for tuple_i, tuple_ele in enumerate(k):
                    dict[tuple_ele] = it[i][tuple_i]
            self.job_queue.append(proto % dict)
        self.flush_queue()
Ejemplo n.º 3
0
    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids = cfg['grids']
        POIs = cfg['POIs']
        opts = cfg['opts']
        toys_per_cycle = cfg['toys_per_cycle']
        zipname = cfg.get('zipfile', None)
        contours = cfg.get('contours',
                           ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys = cfg.get('min_toys', 500)
        max_toys = cfg.get('max_toys', 5000)
        signif = cfg.get('signif', 3.0)
        cl = cfg.get('CL', 0.95)
        verbose = cfg.get('verbose', False)
        make_plots = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete = cfg.get('output_incomplete', False)
        outfile = cfg.get('output', 'hybrid_grid.root')
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters'
                   ) and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(
                self.args,
                'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []
        blacklisted_points = []
        for igrid in grids:
            assert (len(igrid) == 3)
            if igrid[2] == '':
                points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1])))
            else:
                blacklisted_points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1]),
                                      utils.split_vals(igrid[2])))

        # This dictionary will keep track of the combine output files for each model point
        file_dict = {}
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile(
            'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root'
            % (POIs[0], POIs[1]))

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname + '#' + f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' %
                           (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f)  # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname + '#' + f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []

        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key, val in file_dict.iteritems():
            total_points += 1
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
            files = [x for x in val.values() if plot.TFileIsGood(x)]
            # Merge the HypoTestResult objects from each file into one
            res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            #
            ok, point_res = self.ValidateHypoTest(
                res,
                min_toys=min_toys,
                max_toys=max_toys,
                contours=contours,
                signif=signif,
                cl=cl,
                output=self.args.output,
                verbose=verbose) if res is not None else (False, {
                    "ntoys": 0
                })

            print '>> Point %s [%i toys, %s]' % (
                name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')

            if ok:
                complete_points += 1

            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res,
                                  'plot_' + name,
                                  opts=cfg['plot_settings'],
                                  poi_vals=(float(key[0]), float(key[1])))

            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if res is not None and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])

            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (
                    self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx + self.args.cycles)

                print '>>> Done cycles: ' + ','.join(
                    str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)

                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(
                    ['%s=%s,%s=%s' %
                     (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] +
                                      to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (
                    name, set_arg, freeze_arg)
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join([
                        'combine -M HybridNew', opts, point_args,
                        '-T %i' % toys_per_cycle,
                        '-s %i' % idx
                    ] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (
            complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]),
                                      array('d',
                                            output_x), array('d', output_y),
                                      array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_clserr[c]))
                graph.SetName('clsErr_' + c)
                fout.WriteTObject(graph, 'clsErr_' + c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_signif[c]))
                graph.SetName('signif_' + c)
                fout.WriteTObject(graph, 'signif_' + c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x),
                                  array('d', output_y),
                                  array('d', output_ntoys))
            graph.SetName('ntoys' + c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()
Ejemplo n.º 4
0
    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # This is what the logic should be:
        #  - get the list of model points
        #  - figure out which files are:
        #    - completely missing
        #    - there but corrupt, missing tree
        #    - ok
        #  - If we have anything in the third category proceed to produce output files
        #  - Anything in the first two gets added to the queue only if --doFits is specified
        #    so that the

        # Step 1 - open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)
        # to do - have to handle the case where it doesn't exist
        points = []
        blacklisted_points = []
        for igrid in cfg['grids']:
            assert (len(igrid) == 3)
            if igrid[2] == '':
                points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1])))
            else:
                blacklisted_points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1]),
                                      utils.split_vals(igrid[2])))
        POIs = cfg['POIs']
        opts = cfg['opts']

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters'
                   ) and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(
                self.args,
                'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        file_dict = {}
        for p in points:
            file_dict[p] = []

        for f in glob.glob('higgsCombine.%s.*.%s.*.Asymptotic.mH*.root' %
                           (POIs[0], POIs[1])):
            # print f
            rgx = re.compile(
                'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.Asymptotic\.mH.*\.root'
                % (POIs[0], POIs[1]))
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            if p in file_dict:
                file_dict[p].append(f)

        for key, val in file_dict.iteritems():
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
            print '>> Point %s' % name
            if len(val) == 0:
                print 'Going to run limit for point %s' % (key, )
                set_arg = ','.join(
                    ['%s=%s,%s=%s' %
                     (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] +
                                      to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (
                    name, set_arg, freeze_arg)
                cmd = ' '.join(['combine -M Asymptotic', opts, point_args] +
                               self.passthru)
                self.job_queue.append(cmd)

        bail_out = len(self.job_queue) > 0
        self.flush_queue()

        if bail_out:
            print '>> New jobs were created / run in this cycle, run the script again to collect the output'
            sys.exit(0)

        xvals = []
        yvals = []
        zvals_m2s = []
        zvals_m1s = []
        zvals_exp = []
        zvals_p1s = []
        zvals_p2s = []
        zvals_obs = []
        for key, val in file_dict.iteritems():
            for filename in val:
                fin = ROOT.TFile(filename)
                if fin.IsZombie(): continue
                tree = fin.Get('limit')
                for evt in tree:
                    if abs(evt.quantileExpected + 1) < 0.01:
                        xvals.append(float(key[0]))
                        yvals.append(float(key[1]))
                        #print 'At point %s have observed CLs = %f' % (key, evt.limit)
                        zvals_obs.append(float(evt.limit))
                    if abs(evt.quantileExpected - 0.025) < 0.01:
                        #print 'At point %s have -2sigma CLs = %f' % (key, evt.limit)
                        zvals_m2s.append(float(evt.limit))
                    if abs(evt.quantileExpected - 0.16) < 0.01:
                        #print 'At point %s have -1sigma CLs = %f' % (key, evt.limit)
                        zvals_m1s.append(float(evt.limit))
                    if abs(evt.quantileExpected - 0.5) < 0.01:
                        #print 'At point %s have expected CLs = %f' % (key, evt.limit)
                        zvals_exp.append(float(evt.limit))
                    if abs(evt.quantileExpected - 0.84) < 0.01:
                        #print 'At point %s have +1sigma CLs = %f' % (key, evt.limit)
                        zvals_p1s.append(float(evt.limit))
                    if abs(evt.quantileExpected - 0.975) < 0.01:
                        #print 'At point %s have +2sigma CLs = %f' % (key, evt.limit)
                        zvals_p2s.append(float(evt.limit))
        for POI1, POI2, CLs in blacklisted_points:
            xvals.append(float(POI1))
            yvals.append(float(POI2))
            zvals_m2s.append(float(CLs))
            zvals_m1s.append(float(CLs))
            zvals_exp.append(float(CLs))
            zvals_p1s.append(float(CLs))
            zvals_p2s.append(float(CLs))
            zvals_obs.append(float(CLs))
        graph_m2s = ROOT.TGraph2D(len(zvals_m2s), array('d', xvals),
                                  array('d', yvals), array('d', zvals_m2s))
        graph_m1s = ROOT.TGraph2D(len(zvals_m1s), array('d', xvals),
                                  array('d', yvals), array('d', zvals_m1s))
        graph_exp = ROOT.TGraph2D(len(zvals_exp), array('d', xvals),
                                  array('d', yvals), array('d', zvals_exp))
        graph_p1s = ROOT.TGraph2D(len(zvals_p1s), array('d', xvals),
                                  array('d', yvals), array('d', zvals_p1s))
        graph_p2s = ROOT.TGraph2D(len(zvals_p2s), array('d', xvals),
                                  array('d', yvals), array('d', zvals_p2s))
        graph_obs = ROOT.TGraph2D(len(zvals_obs), array('d', xvals),
                                  array('d', yvals), array('d', zvals_obs))
        #h_bins = cfg['hist_binning']
        #hist = ROOT.TH2F('h_observed', '', h_bins[0], h_bins[1], h_bins[2], h_bins[3], h_bins[4], h_bins[5])
        #for i in xrange(1, hist.GetNbinsX()+1):
        #  for j in xrange(1, hist.GetNbinsY()+1):
        #    hist.SetBinContent(i, j, graph.Interpolate(hist.GetXaxis().GetBinCenter(i), hist.GetYaxis().GetBinCenter(j)))
        fout = ROOT.TFile('asymptotic_grid.root', 'RECREATE')
        fout.WriteTObject(graph_m2s, 'exp-2')
        fout.WriteTObject(graph_m1s, 'exp-1')
        fout.WriteTObject(graph_exp, 'exp0')
        fout.WriteTObject(graph_p1s, 'exp+1')
        fout.WriteTObject(graph_p2s, 'exp+2')
        fout.WriteTObject(graph_obs, 'obs')
        #fout.WriteTObject(hist)
        fout.Close()
Ejemplo n.º 5
0
    def run_method(self):
        # Put the method back in because we always take it out
        self.put_back_arg('method', '-M')

        # cmd_queue = []
        subbed_vars = {}

        # pre_cmd = ''

        if self.args.mass is not None:
            mass_vals = utils.split_vals(self.args.mass)
            subbed_vars[('MASS', )] = [(mval, ) for mval in mass_vals]
            self.passthru.extend(['-m', '%(MASS)s'])

        if self.args.singlePoint is not None:
            single_points = utils.split_vals(self.args.singlePoint)
            subbed_vars[('SINGLEPOINT', )] = [(pval, )
                                              for pval in single_points]
            self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s'])
            self.args.name += '.POINT.%(SINGLEPOINT)s'

        if self.args.seed is not None:
            seed_vals = utils.split_vals(self.args.seed)
            subbed_vars[('SEED', )] = [(sval, ) for sval in seed_vals]
            self.passthru.extend(['-s', '%(SEED)s'])

        for i, generate in enumerate(self.args.generate):
            split_char = ':' if '::' in generate else ';'
            gen_header, gen_content = generate.split(split_char * 2)
            print gen_header
            print gen_content
            gen_headers = gen_header.split(split_char)
            gen_entries = gen_content.split(split_char)
            key = tuple()
            arglist = []
            for header in gen_headers:
                if header == 'n' or header == 'name':
                    self.args.name += '.%(GENNAME' + str(i) + ')s'
                    key += ('GENNAME' + str(i), )
                else:
                    self.passthru.extend(['%(' + header + ')s'])
                    key += (header, )
            for entry in gen_entries:
                if ',,' in entry:
                    split_entry = entry.split(',,')
                else:
                    split_entry = entry.split(',')
                final_arg = []
                for header, e in zip(gen_headers, split_entry):
                    argname = '-%s' % header if len(
                        header) == 1 else '--%s' % header
                    if header == 'n' or header == 'name':
                        final_arg.append(e)
                    elif len(e) and e != '!':
                        final_arg.append('%s %s' % (argname, e))
                    else:
                        final_arg.append('')
                arglist.append(tuple(final_arg))
            subbed_vars[key] = arglist

        if len(self.args.datacard) >= 1:
            # Two lists of tuples, one which does specify the mass, and one
            # which doesn't
            dc_mass = []
            dc_no_mass = []
            for dc in self.args.datacard:
                # Split workspace into path and filename
                path, file = os.path.split(dc)
                # If the wsp is in the current directory should call it '.'
                if path == '':
                    path = '.'
                # If we're not using the --there option then leave the
                # workspace argument as the full path
                if not self.args.there:
                    file = dc
                # Figure out if the enclosing directory is a mass value
                dirs = path.split('/')
                if self.args.mass is None and len(dirs) >= 1 and isfloat(
                        dirs[-1]):
                    print 'Assuming card %s uses mass value %s' % (dc,
                                                                   dirs[-1])
                    dc_mass.append((path, file, dirs[-1]))
                dc_no_mass.append((path, file))
            # If at least one mass value was inferred assume all of them are like this
            if len(dc_mass) > 0:
                subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass
                self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s'])
            else:
                subbed_vars[(
                    'DIR',
                    'DATACARD',
                )] = dc_no_mass
                self.passthru.extend(['-d', '%(DATACARD)s'])
        # elif len(self.args.datacard) == 1:
        #     self.passthru.extend(['-d', self.args.datacard[0]])

        current_ranges = self.args.setPhysicsModelParameterRanges
        put_back_ranges = current_ranges is not None

        if self.args.boundlist is not None:
            # We definitely don't need to put the parameter ranges back
            # into the args because they're going in via the boundlist
            # option instead
            put_back_ranges = False
            with open(self.args.boundlist) as json_file:
                bnd = json.load(json_file)
            bound_pars = list(bnd.keys())
            print 'Found bounds for parameters %s' % ','.join(bound_pars)
            # Fill a dictionaries of the bound info of the form:
            #  { 'PAR1' : [(MASS, LOWER, UPER), ...], ...}
            bound_vals = {}
            for par in bound_pars:
                bound_vals[par] = list()
                for mass, bounds in bnd[par].iteritems():
                    bound_vals[par].append((float(mass), bounds[0], bounds[1]))
                bound_vals[par].sort(key=lambda x: x[0])
            # find the subbed_vars entry containing the mass
            # We will extend it to also specify the ranges
            dict_key = None
            mass_idx = None
            for key in subbed_vars.keys():
                if 'MASS' in key:
                    dict_key = key
                    mass_idx = dict_key.index('MASS')
            new_key = dict_key + ('MODELBOUND', )
            new_list = []
            for entry in subbed_vars[dict_key]:
                command = []
                if current_ranges is not None:
                    command.append(current_ranges)
                mval = entry[mass_idx]
                for par in bound_pars:
                    # The (mass, None, None) is just a trick to make bisect_left do the comparison
                    # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
                    # issues
                    lower_bound = bisect.bisect_left(
                        bound_vals[par], (float(mval) + 1E-5, None, None))
                    # If lower_bound == 0 this means we are at or below the lowest mass point,
                    # in which case we should increase by one to take the bounds from this lowest
                    # point
                    if lower_bound == 0:
                        lower_bound += 1
                    command.append('%s=%g,%g' %
                                   (par, bound_vals[par][lower_bound - 1][1],
                                    bound_vals[par][lower_bound - 1][2]))
                new_list.append(entry + (str(':'.join(command)), ))
            # now remove the current mass information from subbed_vars
            # and replace it with the updated one
            del subbed_vars[dict_key]
            subbed_vars[new_key] = new_list
            self.passthru.extend(
                ['--setPhysicsModelParameterRanges', '%(MODELBOUND)s'])

        # We might need to put the intercepted --setPhysicsModelParameterRanges arg back in
        if put_back_ranges:
            self.put_back_arg('setPhysicsModelParameterRanges',
                              '--setPhysicsModelParameterRanges')

        if self.args.points is not None:
            self.passthru.extend(['--points', self.args.points])
        if (self.args.split_points is not None and self.args.split_points > 0
                and self.args.points is not None):
            points = int(self.args.points)
            split = self.args.split_points
            start = 0
            ranges = []
            while (start + (split - 1)) < points:
                #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root"
                #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
                #        # Send job, if the file it's supposed to create doesn't exist yet
                #        # or if the file is empty because the previous job didn't finish
                ranges.append((start, start + (split - 1)))
                start += split
            if start < points:
                #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root"
                #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
                ranges.append((start, points - 1))
            #if (ranges == []):
            #    print "No jobs were created; All files already exist"
            #    exit()
            subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges]
            self.passthru.extend(
                ['--firstPoint %(P_START)s --lastPoint %(P_END)s'])
            self.args.name += '.POINTS.%(P_START)s.%(P_END)s'

        # can only put the name option back now because we might have modified
        # it from what the user specified
        self.put_back_arg('name', '-n')
        proto = 'combine ' + (' '.join(self.passthru))
        if self.args.there:
            proto = 'pushd %(DIR)s; combine ' + (' '.join(
                self.passthru)) + '; popd'

        for it in itertools.product(*subbed_vars.values()):
            keys = subbed_vars.keys()
            dict = {}
            for i, k in enumerate(keys):
                for tuple_i, tuple_ele in enumerate(k):
                    dict[tuple_ele] = it[i][tuple_i]
            self.job_queue.append(proto % dict)
        self.flush_queue()
Ejemplo n.º 6
0
    def run_method(self):
        # Put the method back in because we always take it out
        self.put_back_arg('method', '-M')

        # cmd_queue = []
        subbed_vars = {}

        # pre_cmd = ''

        if self.args.mass is not None:
            mass_vals = utils.split_vals(self.args.mass)
            subbed_vars[('MASS', )] = [(mval, ) for mval in mass_vals]
            self.passthru.extend(['-m', '%(MASS)s'])

        if self.args.singlePoint is not None:
            single_points = utils.split_vals(self.args.singlePoint)
            subbed_vars[('SINGLEPOINT', )] = [(pval, )
                                              for pval in single_points]
            self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s'])
            self.args.name += '.POINT.%(SINGLEPOINT)s'

        if self.args.seed is not None:
            seed_vals = utils.split_vals(self.args.seed)
            subbed_vars[('SEED', )] = [(sval, ) for sval in seed_vals]
            self.passthru.extend(['-s', '%(SEED)s'])

        if len(self.args.datacard) >= 1:
            # Two lists of tuples, one which does specify the mass, and one
            # which doesn't
            dc_mass = []
            dc_no_mass = []
            for dc in self.args.datacard:
                # Split workspace into path and filename
                path, file = os.path.split(dc)
                # If the wsp is in the current directory should call it '.'
                if path == '':
                    path = '.'
                # If we're not using the --there option then leave the
                # workspace argument as the full path
                if not self.args.there:
                    file = dc
                # Figure out if the enclosing directory is a mass value
                dirs = path.split('/')
                if self.args.mass is None and len(dirs) >= 1 and isfloat(
                        dirs[-1]):
                    print 'Assuming card %s uses mass value %s' % (dc,
                                                                   dirs[-1])
                    dc_mass.append((path, file, dirs[-1]))
                dc_no_mass.append((path, file))
            # If at least one mass value was inferred assume all of them are like this
            if len(dc_mass) > 0:
                subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass
                self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s'])
            else:
                subbed_vars[(
                    'DIR',
                    'DATACARD',
                )] = dc_no_mass
                self.passthru.extend(['-d', '%(DATACARD)s'])
        # elif len(self.args.datacard) == 1:
        #     self.passthru.extend(['-d', self.args.datacard[0]])

        if self.args.boundlist is not None:
            with open(self.args.boundlist) as json_file:
                bnd = json.load(json_file)
            # find the subbed_vars entry containing the mass
            # We will extend it to also specify the ranges
            dict_key = None
            mass_idx = None
            for key in subbed_vars.keys():
                if 'MASS' in key:
                    dict_key = key
                    mass_idx = dict_key.index('MASS')
            new_key = dict_key + ('MODELBOUND', )
            new_list = []
            for entry in subbed_vars[dict_key]:
                command = []
                mval = entry[mass_idx]
                for model in bnd:
                    command.append(model + '=0,' + str(bnd[model][mval]))
                new_list.append(entry + (':'.join(command), ))
            # now remove the current mass information from subbed_vars
            # and replace it with the updated one
            del subbed_vars[dict_key]
            subbed_vars[new_key] = new_list
            self.passthru.extend(
                ['--setPhysicsModelParameterRanges', '%(MODELBOUND)s'])

        if self.args.points is not None:
            self.passthru.extend(['--points', self.args.points])
        if (self.args.split_points is not None and self.args.split_points > 0
                and self.args.points is not None):
            points = int(self.args.points)
            split = self.args.split_points
            start = 0
            ranges = []
            while (start + (split - 1)) <= points:
                #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root"
                #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
                #        # Send job, if the file it's supposed to create doesn't exist yet
                #        # or if the file is empty because the previous job didn't finish
                ranges.append((start, start + (split - 1)))
                start += split
            if start < points:
                #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root"
                #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
                ranges.append((start, points - 1))
            #if (ranges == []):
            #    print "No jobs were created; All files already exist"
            #    exit()
            subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges]
            self.passthru.extend(
                ['--firstPoint %(P_START)s --lastPoint %(P_END)s'])
            self.args.name += '.POINTS.%(P_START)s.%(P_END)s'

        # can only put the name option back now because we might have modified
        # it from what the user specified
        self.put_back_arg('name', '-n')
        proto = 'combine ' + (' '.join(self.passthru))
        if self.args.there:
            proto = 'pushd %(DIR)s; combine ' + (' '.join(
                self.passthru)) + '; popd'

        for it in itertools.product(*subbed_vars.values()):
            keys = subbed_vars.keys()
            dict = {}
            for i, k in enumerate(keys):
                for tuple_i, tuple_ele in enumerate(k):
                    dict[tuple_ele] = it[i][tuple_i]
            self.job_queue.append(proto % dict)
        self.flush_queue()
Ejemplo n.º 7
0
    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids = cfg['grids']
        grids_to_remove = cfg.get('grids_to_remove', None)
        POIs = cfg['POIs']
        opts = cfg['opts']
        toys_per_cycle = cfg['toys_per_cycle']
        zipname = cfg.get('zipfile', None)
        statfile = cfg.get('statusfile', None)
        contours = cfg.get('contours',
                           ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys = cfg.get('min_toys', 500)
        max_toys = cfg.get('max_toys', 5000)
        signif = cfg.get('signif', 3.0)
        cl = cfg.get('CL', 0.95)
        verbose = cfg.get('verbose', False)
        make_plots = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete = cfg.get('output_incomplete', False)
        outfile = cfg.get('output', 'hybrid_grid.root')
        from_asymptotic_settings = cfg.get('from_asymptotic_settings', dict())
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters'
                   ) and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(
                self.args,
                'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []
        blacklisted_points = []

        # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for
        # the grid points, as the numerical precision of a given point may change once the grid spacing is
        # modified. By default we let split_vals do it's thing however
        fmt_spec = None

        # In this mode we're doing a classic limit search vs MH instead of a 2D grid.
        # Most of the same code can be used however. First we'll use the json file containing the
        # asymptotic limits to create a new grid from scratch.
        if self.args.from_asymptotic is not None:
            grids = []
            bound_vals = None
            bound_pars = []
            fmt_spec = '%.5g'
            with open(self.args.from_asymptotic) as limit_json:
                limits = json.load(limit_json)
            for m in limits.keys():
                limit_vals = [x for x in limits[m].values()]
                max_limit = max(limit_vals)
                min_limit = min(limit_vals)
                # print (min_limit, max_limit)
                width = max_limit - min_limit
                max_limit += width * 0.3
                min_limit = max(0.0, min_limit - width * 0.3)
                nsteps = from_asymptotic_settings.get('points', 100)
                step_width = (max_limit - min_limit) / nsteps
                grids.append(
                    [m, '%g:%g|%g' % (min_limit, max_limit, step_width), ''])
                boundlist_file = from_asymptotic_settings.get('boundlist', '')
                if boundlist_file:
                    with open(boundlist_file) as json_file:
                        bnd = json.load(json_file)
                    bound_pars = list(bnd.keys())
                    print 'Found bounds for parameters %s' % ','.join(
                        bound_pars)
                    bound_vals = {}
                    for par in bound_pars:
                        bound_vals[par] = list()
                        for mass, bounds in bnd[par].iteritems():
                            bound_vals[par].append(
                                (float(mass), bounds[0], bounds[1]))
                        bound_vals[par].sort(key=lambda x: x[0])
                # print (min_limit, max_limit)
            # sys.exit(0)

        for igrid in grids:
            assert (len(igrid) == 3)
            if igrid[2] == '':
                points.extend(
                    itertools.product(
                        utils.split_vals(igrid[0], fmt_spec=fmt_spec),
                        utils.split_vals(igrid[1], fmt_spec=fmt_spec)))
            else:
                blacklisted_points.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1]),
                                      utils.split_vals(igrid[2])))

        #In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them:
        points_to_remove = []
        if grids_to_remove is not None:
            for igrid in grids_to_remove:
                assert (len(igrid) == 2)
                points_to_remove.extend(
                    itertools.product(utils.split_vals(igrid[0]),
                                      utils.split_vals(igrid[1])))

        for p in points_to_remove:
            points.remove(p)

        # This dictionary will keep track of the combine output files for each model point
        file_dict = {}
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile(
            'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root'
            % (POIs[0], POIs[1]))

        stats = {}
        if statfile and os.path.isfile(statfile):
            with open(statfile) as stat_json:
                stats = json.load(stat_json)

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname + '#' + f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' %
                           (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f)  # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname + '#' + f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []

        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key, val in file_dict.iteritems():
            status_changed = True
            total_points += 1
            status_key = ':'.join(key)
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])

            # First check if we use the status json
            all_files = val.values()
            status_files = []
            files = []

            if status_key in stats:
                status_files = stats[status_key]['files']
                if set(all_files) == set(status_files):
                    print 'For point %s, no files have been updated' % name
                    status_changed = False
                    files = all_files
                else:
                    files = [x for x in val.values() if plot.TFileIsGood(x)]
                    if set(files) == set(
                            status_files) and len(files) < len(all_files):
                        print 'For point %s, new files exist but they are not declared good' % name
                        status_changed = False
            else:
                files = [x for x in val.values() if plot.TFileIsGood(x)]

            # Merge the HypoTestResult objects from each file into one
            res = None
            precomputed = None
            if status_key in stats and not status_changed and stats[
                    status_key]["ntoys"] > 0:
                precomputed = stats[status_key]
            else:
                res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            #
            ok, point_res = self.ValidateHypoTest(res,
                                                  min_toys=min_toys,
                                                  max_toys=max_toys,
                                                  contours=contours,
                                                  signif=signif,
                                                  cl=cl,
                                                  output=self.args.output,
                                                  verbose=verbose,
                                                  precomputed=precomputed)

            print '>> Point %s [%i toys, %s]' % (
                name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')

            stats[status_key] = {'files': files, 'ntoys': point_res['ntoys']}
            for cont in contours:
                if cont in point_res:
                    stats[status_key][cont] = point_res[cont]

            if ok:
                complete_points += 1

            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res,
                                  'plot_' + name,
                                  opts=cfg['plot_settings'],
                                  poi_vals=(float(key[0]), float(key[1])),
                                  point_info=point_res)

            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if (res is not None or precomputed
                    is not None) and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])

            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (
                    self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx + self.args.cycles)

                print '>>> Done cycles: ' + ','.join(
                    str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)

                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(
                    ['%s=%s,%s=%s' %
                     (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] +
                                      to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (
                    name, set_arg, freeze_arg)
                if self.args.from_asymptotic:
                    mval = key[0]
                    command = []
                    for par in bound_pars:
                        # The (mass, None, None) is just a trick to make bisect_left do the comparison
                        # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
                        # issues
                        lower_bound = bisect.bisect_left(
                            bound_vals[par], (float(mval) + 1E-5, None, None))
                        # If lower_bound == 0 this means we are at or below the lowest mass point,
                        # in which case we should increase by one to take the bounds from this lowest
                        # point
                        if lower_bound == 0:
                            lower_bound += 1
                        command.append(
                            '%s=%g,%g' %
                            (par, bound_vals[par][lower_bound - 1][1],
                             bound_vals[par][lower_bound - 1][2]))
                    if len(command) > 0:
                        point_args += (' --setPhysicsModelParameterRanges %s' %
                                       (':'.join(command)))
                    # print per_mass_point_args
                    point_args += ' --singlePoint %s' % key[1]
                    point_args += ' -m %s' % mval
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join([
                        'combine -M HybridNew', opts, point_args,
                        '-T %i' % toys_per_cycle,
                        '-s %i' % idx
                    ] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (
            complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output and not self.args.from_asymptotic:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]),
                                      array('d',
                                            output_x), array('d', output_y),
                                      array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_clserr[c]))
                graph.SetName('clsErr_' + c)
                fout.WriteTObject(graph, 'clsErr_' + c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]),
                                      array('d', output_x),
                                      array('d', output_y),
                                      array('d', output_signif[c]))
                graph.SetName('signif_' + c)
                fout.WriteTObject(graph, 'signif_' + c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x),
                                  array('d', output_y),
                                  array('d', output_ntoys))
            graph.SetName('ntoys' + c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()

        if self.args.output and self.args.from_asymptotic:
            # Need to collect all the files for each mass point and hadd them:
            files_by_mass = {}
            for key, val in file_dict.iteritems():
                if key[0] not in files_by_mass:
                    files_by_mass[key[0]] = list()
                files_by_mass[key[0]].extend(val.values())
            for m, files in files_by_mass.iteritems():
                gridfile = 'higgsCombine.gridfile.%s.%s.%s.root' % (POIs[0], m,
                                                                    POIs[1])
                self.job_queue.append('hadd -f %s %s' %
                                      (gridfile, ' '.join(files)))
                for exp in ['', '0.025', '0.160', '0.500', '0.840', '0.975']:
                    self.job_queue.append(' '.join([
                        'combine -M HybridNew --rAbsAcc 0', opts,
                        '--grid %s' % gridfile,
                        '-n .final.%s.%s.%s' % (POIs[0], m, POIs[1]),
                        '-m %s' % (m), ('--expectedFromGrid %s' %
                                        exp) if exp else '--noUpdateGrid'
                    ] + self.passthru))
                self.flush_queue()

        if statfile:
            with open(statfile, 'w') as stat_out:
                stat_json = json.dumps(stats,
                                       sort_keys=True,
                                       indent=2,
                                       separators=(',', ': '))
                stat_out.write(stat_json)
Ejemplo n.º 8
0
  def run_method(self):
    ROOT.PyConfig.IgnoreCommandLineOptions = True
    ROOT.gROOT.SetBatch(ROOT.kTRUE)

    # This is what the logic should be:
    #  - get the list of model points
    #  - figure out which files are:
    #    - completely missing
    #    - there but corrupt, missing tree
    #    - ok
    #  - If we have anything in the third category proceed to produce output files
    #  - Anything in the first two gets added to the queue only if --doFits is specified

    # Step 1 - open the json config file
    with open(self.args.config) as json_file:
        cfg = json.load(json_file)
    # to do - have to handle the case where it doesn't exist
    points = []; blacklisted_points = []
    for igrid in cfg['grids']:
      assert(len(igrid) == 3)
      if igrid[2]=='' : points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1])))
      else : blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2])))
    POIs = cfg['POIs']
    opts = cfg['opts']

    # Have to merge some arguments from both the command line and the "opts" in the json file
    to_freeze = []
    to_set = []
    set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
    if set_opt is not None: to_set.append(set_opt)
    freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
    if freeze_opt is not None: to_freeze.append(freeze_opt)
    if hasattr(self.args, 'setPhysicsModelParameters') and self.args.setPhysicsModelParameters is not None:
        to_set.append(self.args.setPhysicsModelParameters)
    if hasattr(self.args, 'freezeNuisances') and self.args.freezeNuisances is not None:
        to_freeze.append(self.args.freezeNuisances)

    file_dict = { }
    for p in points:
      file_dict[p] = []

    for f in glob.glob('higgsCombine.%s.*.%s.*.Asymptotic.mH*.root' % (POIs[0], POIs[1])):
      # print f
      rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.Asymptotic\.mH.*\.root' % (POIs[0], POIs[1]))
      matches = rgx.search(f)
      p = (matches.group('p1'), matches.group('p2'))
      if p in file_dict:
        file_dict[p].append(f)

    for key,val in file_dict.iteritems():
      name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
      print '>> Point %s' % name
      if len(val) == 0:
        print 'Going to run limit for point %s' % (key,)
        set_arg = ','.join(['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set)
        freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze)
        point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (name, set_arg, freeze_arg)
        cmd = ' '.join(['combine -M Asymptotic', opts, point_args] + self.passthru)
        self.job_queue.append(cmd)

    bail_out = len(self.job_queue) > 0
    self.flush_queue()

    if bail_out:
        print '>> New jobs were created / run in this cycle, run the script again to collect the output'
        sys.exit(0)

    xvals = []
    yvals = []
    zvals_m2s = []; zvals_m1s = []; zvals_exp = []; zvals_p1s = []; zvals_p2s = []; zvals_obs = []
    for key,val in file_dict.iteritems():
      for filename in val:
        fin = ROOT.TFile(filename)
        if fin.IsZombie(): continue
        tree = fin.Get('limit')
        for evt in tree:
          if abs(evt.quantileExpected+1)<0.01:
            xvals.append(float(key[0]))
            yvals.append(float(key[1]))
            #print 'At point %s have observed CLs = %f' % (key, evt.limit)
            zvals_obs.append(float(evt.limit))
          if abs(evt.quantileExpected-0.025)<0.01:
            #print 'At point %s have -2sigma CLs = %f' % (key, evt.limit)
            zvals_m2s.append(float(evt.limit))
          if abs(evt.quantileExpected-0.16)<0.01:
            #print 'At point %s have -1sigma CLs = %f' % (key, evt.limit)
            zvals_m1s.append(float(evt.limit))
          if abs(evt.quantileExpected-0.5)<0.01:
            #print 'At point %s have expected CLs = %f' % (key, evt.limit)
            zvals_exp.append(float(evt.limit))
          if abs(evt.quantileExpected-0.84)<0.01:
            #print 'At point %s have +1sigma CLs = %f' % (key, evt.limit)
            zvals_p1s.append(float(evt.limit))
          if abs(evt.quantileExpected-0.975)<0.01:
            #print 'At point %s have +2sigma CLs = %f' % (key, evt.limit)
            zvals_p2s.append(float(evt.limit))
    for POI1, POI2, CLs in blacklisted_points:
      xvals.append(float(POI1))
      yvals.append(float(POI2))
      zvals_m2s.append(float(CLs))
      zvals_m1s.append(float(CLs))
      zvals_exp.append(float(CLs))
      zvals_p1s.append(float(CLs))
      zvals_p2s.append(float(CLs))
      zvals_obs.append(float(CLs))
    graph_m2s = ROOT.TGraph2D(len(zvals_m2s), array('d', xvals), array('d', yvals), array('d', zvals_m2s))
    graph_m1s = ROOT.TGraph2D(len(zvals_m1s), array('d', xvals), array('d', yvals), array('d', zvals_m1s))
    graph_exp = ROOT.TGraph2D(len(zvals_exp), array('d', xvals), array('d', yvals), array('d', zvals_exp))
    graph_p1s = ROOT.TGraph2D(len(zvals_p1s), array('d', xvals), array('d', yvals), array('d', zvals_p1s))
    graph_p2s = ROOT.TGraph2D(len(zvals_p2s), array('d', xvals), array('d', yvals), array('d', zvals_p2s))
    graph_obs = ROOT.TGraph2D(len(zvals_obs), array('d', xvals), array('d', yvals), array('d', zvals_obs))
    #h_bins = cfg['hist_binning']
    #hist = ROOT.TH2F('h_observed', '', h_bins[0], h_bins[1], h_bins[2], h_bins[3], h_bins[4], h_bins[5])
    #for i in xrange(1, hist.GetNbinsX()+1):
    #  for j in xrange(1, hist.GetNbinsY()+1):
    #    hist.SetBinContent(i, j, graph.Interpolate(hist.GetXaxis().GetBinCenter(i), hist.GetYaxis().GetBinCenter(j)))
    fout = ROOT.TFile('asymptotic_grid.root', 'RECREATE')
    fout.WriteTObject(graph_m2s, 'exp-2')
    fout.WriteTObject(graph_m1s, 'exp-1')
    fout.WriteTObject(graph_exp, 'exp0')
    fout.WriteTObject(graph_p1s, 'exp+1')
    fout.WriteTObject(graph_p2s, 'exp+2')
    fout.WriteTObject(graph_obs, 'obs')
    #fout.WriteTObject(hist)
    fout.Close()
Ejemplo n.º 9
0
    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids           = cfg['grids']
        grids_to_remove = cfg.get('grids_to_remove', None)
        POIs            = cfg['POIs']
        opts            = cfg['opts']
        toys_per_cycle  = cfg['toys_per_cycle']
        zipname         = cfg.get('zipfile',    None)
        statfile        = cfg.get('statusfile', None)
        contours        = cfg.get('contours',   ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys        = cfg.get('min_toys',   500)
        max_toys        = cfg.get('max_toys',   5000)
        signif          = cfg.get('signif',     3.0)
        cl              = cfg.get('CL',         0.95)
        verbose         = cfg.get('verbose',    False)
        make_plots      = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete      = cfg.get('output_incomplete', False)
        outfile         = cfg.get('output','hybrid_grid.root')
        from_asymptotic_settings = cfg.get('from_asymptotic_settings', dict())
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters') and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(self.args, 'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []
        blacklisted_points = []

        # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for
        # the grid points, as the numerical precision of a given point may change once the grid spacing is
        # modified. By default we let split_vals do it's thing however
        fmt_spec = None

        # In this mode we're doing a classic limit search vs MH instead of a 2D grid.
        # Most of the same code can be used however. First we'll use the json file containing the
        # asymptotic limits to create a new grid from scratch.
        if self.args.from_asymptotic is not None:
            grids = []
            bound_vals = None
            bound_pars = []
            fmt_spec = '%.5g'
            with open(self.args.from_asymptotic) as limit_json:
                limits = json.load(limit_json)
            for m in limits.keys():
                limit_vals = [x for x in limits[m].values()]
                max_limit = max(limit_vals)
                min_limit = min(limit_vals)
                # print (min_limit, max_limit)
                width = max_limit - min_limit
                max_limit += width * 0.3
                min_limit = max(0.0, min_limit - width * 0.3)
                nsteps = from_asymptotic_settings.get('points', 100)
                step_width = (max_limit - min_limit) / nsteps
                grids.append([m, '%g:%g|%g' % (min_limit, max_limit, step_width), ''])
                boundlist_file = from_asymptotic_settings.get('boundlist', '')
                if boundlist_file:
                    with open(boundlist_file) as json_file:
                        bnd = json.load(json_file)
                    bound_pars = list(bnd.keys())
                    print 'Found bounds for parameters %s' % ','.join(bound_pars)
                    bound_vals = {}
                    for par in bound_pars:
                        bound_vals[par] = list()
                        for mass, bounds in bnd[par].iteritems():
                            bound_vals[par].append((float(mass), bounds[0], bounds[1]))
                        bound_vals[par].sort(key=lambda x: x[0])
                # print (min_limit, max_limit)
            # sys.exit(0)

        for igrid in grids:
            assert(len(igrid) == 3)
            if igrid[2] == '':
                points.extend(itertools.product(utils.split_vals(igrid[0], fmt_spec=fmt_spec), utils.split_vals(igrid[1], fmt_spec=fmt_spec)))
            else:
                blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2])))

        #In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them:
        points_to_remove = [];
        if grids_to_remove is not None :
            for igrid in grids_to_remove:
                assert(len(igrid) == 2)
                points_to_remove.extend(itertools.product(utils.split_vals(igrid[0]),utils.split_vals(igrid[1])))

        for p in points_to_remove:
            points.remove(p)

        # This dictionary will keep track of the combine output files for each model point
        file_dict = { }
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1]))

        stats = {}
        if statfile and os.path.isfile(statfile):
            with open(statfile) as stat_json:
                stats = json.load(stat_json)

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname+'#'+f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f) # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname+'#'+f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []


        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key,val in file_dict.iteritems():
            status_changed = True
            total_points += 1
            status_key = ':'.join(key)
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
            
            # First check if we use the status json
            all_files = val.values()
            status_files = []
            files = [x for x in val.values() if plot.TFileIsGood(x)]

            if status_key in stats:
                status_files = stats[status_key]['files']
                if set(all_files) == set(status_files):
                    print 'For point %s, no files have been updated' % name
                    status_changed = False
                if set(files) == set(status_files) and len(files) < len(all_files):
                    print 'For point %s, new files exist but they are not declared good' % name
                    status_changed = False

            # Merge the HypoTestResult objects from each file into one
            res = None
            precomputed = None
            if status_key in stats and not status_changed:
                precomputed = stats[status_key]
            else:
                res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            #
            ok, point_res = self.ValidateHypoTest(res,
                min_toys = min_toys,
                max_toys = max_toys,
                contours = contours,
                signif   = signif,
                cl       = cl,
                output   = self.args.output,
                verbose  = verbose,
                precomputed = precomputed)

            print '>> Point %s [%i toys, %s]' % (name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')

            stats[status_key] = {
                'files': files,
                'ntoys': point_res['ntoys']
            }
            for cont in contours:
                if cont in point_res:
                    stats[status_key][cont] = point_res[cont]

            if ok:
                complete_points += 1

            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res, 'plot_'+name, opts = cfg['plot_settings'], poi_vals = (float(key[0]), float(key[1])), point_info=point_res)

            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if (res is not None or precomputed is not None) and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])

            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles)+1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx+self.args.cycles)

                print '>>> Done cycles: ' + ','.join(str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)

                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (name, set_arg, freeze_arg)
                if self.args.from_asymptotic:
                    mval = key[0]
                    command = []
                    for par in bound_pars:
                        # The (mass, None, None) is just a trick to make bisect_left do the comparison
                        # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
                        # issues
                        lower_bound = bisect.bisect_left(bound_vals[par], (float(mval)+1E-5, None, None))
                        # If lower_bound == 0 this means we are at or below the lowest mass point,
                        # in which case we should increase by one to take the bounds from this lowest
                        # point
                        if lower_bound == 0:
                            lower_bound += 1
                        command.append('%s=%g,%g' % (par, bound_vals[par][lower_bound-1][1], bound_vals[par][lower_bound-1][2]))
                    if len(command) > 0:
                        point_args += (' --setPhysicsModelParameterRanges %s' % (':'.join(command)))
                    # print per_mass_point_args
                    point_args += ' --singlePoint %s' % key[1]
                    point_args += ' -m %s' % mval
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join(['combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output and not self.args.from_asymptotic:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c]))
                graph.SetName('clsErr_'+c)
                fout.WriteTObject(graph, 'clsErr_'+c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c]))
                graph.SetName('signif_'+c)
                fout.WriteTObject(graph, 'signif_'+c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys))
            graph.SetName('ntoys'+c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()

        if self.args.output and self.args.from_asymptotic:
            # Need to collect all the files for each mass point and hadd them:
            files_by_mass = {}
            for key,val in file_dict.iteritems():
                if key[0] not in files_by_mass:
                    files_by_mass[key[0]] = list()
                files_by_mass[key[0]].extend(val.values())
            for m, files in files_by_mass.iteritems():
                gridfile = 'higgsCombine.gridfile.%s.%s.%s.root' % (POIs[0], m, POIs[1])
                self.job_queue.append('hadd -f %s %s' % (gridfile, ' '.join(files)))
                for exp in ['', '0.025', '0.160', '0.500', '0.840', '0.975']:
                    self.job_queue.append(' '.join([
                            'combine -M HybridNew --rAbsAcc 0',
                            opts,
                            '--grid %s' % gridfile,
                            '-n .final.%s.%s.%s' % (POIs[0], m, POIs[1]),
                            '-m %s' % (m),
                            ('--expectedFromGrid %s' % exp) if exp else '--noUpdateGrid'
                        ] + self.passthru))
                self.flush_queue()

        if statfile:
            with open(statfile, 'w') as stat_out:
                stat_json = json.dumps(
                    stats, sort_keys=True, indent=2, separators=(',', ': '))
                stat_out.write(stat_json)
Ejemplo n.º 10
0
    def run_method(self):
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch(ROOT.kTRUE)

        # Open the json config file
        with open(self.args.config) as json_file:
            cfg = json.load(json_file)

        # Set all the parameter values locally using defaults if necessary
        grids           = cfg['grids']
        POIs            = cfg['POIs']
        opts            = cfg['opts']
        toys_per_cycle  = cfg['toys_per_cycle']
        zipname         = cfg.get('zipfile',    None)
        contours        = cfg.get('contours',   ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2'])
        min_toys        = cfg.get('min_toys',   500)
        max_toys        = cfg.get('max_toys',   5000)
        signif          = cfg.get('signif',     3.0)
        cl              = cfg.get('CL',         0.95)
        verbose         = cfg.get('verbose',    False)
        make_plots      = cfg.get('make_plots', False)
        # Write CLs values into the output even if current toys do not pass validation
        incomplete      = cfg.get('output_incomplete', False)
        outfile         = cfg.get('output','hybrid_grid.root')
        # NB: blacklisting not yet implemented for this method

        # Have to merge some arguments from both the command line and the "opts" in the json file
        to_freeze = []
        to_set = []
        set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts)
        if set_opt is not None: to_set.append(set_opt)
        freeze_opt, opts = self.extract_arg('--freezeNuisances', opts)
        if freeze_opt is not None: to_freeze.append(freeze_opt)
        if hasattr(self.args, 'setPhysicsModelParameters') and self.args.setPhysicsModelParameters is not None:
            to_set.append(self.args.setPhysicsModelParameters)
        if hasattr(self.args, 'freezeNuisances') and self.args.freezeNuisances is not None:
            to_freeze.append(self.args.freezeNuisances)

        points = []; blacklisted_points = []
        for igrid in grids:
            assert(len(igrid) == 3)
            if igrid[2] == '':
                points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1])))
            else:
                blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2])))

        # This dictionary will keep track of the combine output files for each model point
        file_dict = { }
        for p in points:
            file_dict[p] = {}

        # The regex we will use to identify output files and extract POI values
        rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1]))

        # Can optionally copy output root files into a zip archive
        # If the user has specified a zipfile we will first
        # look for output files in this archive before scanning the
        # current directory
        if zipname:
            # Open the zip file in append mode, this should also
            # create it if it doesn't exist
            zipf = zipfile.ZipFile(zipname, 'a')
            for f in zipf.namelist():
                matches = rgx.search(f)
                p = (matches.group('p1'), matches.group('p2'))
                seed = int(matches.group('toy'))
                if p in file_dict:
                    if seed not in file_dict[p]:
                        # For each model point have a dictionary keyed on the seed number
                        # with a value pointing to the file in the archive in the format
                        # ROOT expects: "zipfile.zip#higgsCombine.blah.root"
                        file_dict[p][seed] = zipname+'#'+f

        # Now look for files in the local directory
        for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])):
            matches = rgx.search(f)
            p = (matches.group('p1'), matches.group('p2'))
            seed = int(matches.group('toy'))
            if p in file_dict:
                # Don't add this file to the list if its seed number is already
                # a value in the dict.
                if seed not in file_dict[p]:
                    # If we're using the zipfile we'll add this now and
                    # then delete it from the local directory
                    # But: only in the file is good, we don't want to pollute the zip
                    # file with incomplete or failed jobs
                    if zipname and plot.TFileIsGood(f):
                        zipf.write(f) # assume this throws if it fails
                        print 'Adding %s to %s' % (f, zipname)
                        file_dict[p][seed] = zipname+'#'+f
                        os.remove(f)
                    else:  # otherwise just add the file to the dict in the normal way
                        file_dict[p][seed] = f

        if zipname:
            zipf.close()

        # These lists will keep track of the CLs values which we will use
        # to create the output TGraph2Ds
        output_x = []
        output_y = []
        output_data = {}
        output_ntoys = []
        output_clserr = {}
        output_signif = {}
        # One list of Z-values per contour 
        for contour in contours:
            output_data[contour] = []
            output_clserr[contour] = []
            output_signif[contour] = []


        # Also keep track of the number of model points which have met the
        # CLs criteria
        total_points = 0
        complete_points = 0

        for key,val in file_dict.iteritems():
            total_points += 1
            name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
            files = [x for x in val.values() if plot.TFileIsGood(x)]
            # Merge the HypoTestResult objects from each file into one
            res = self.GetCombinedHypoTest(files)

            # Do the validation of this model point
            # 
            ok, point_res = self.ValidateHypoTest(res,
                min_toys = min_toys,
                max_toys = max_toys,
                contours = contours,
                signif   = signif,
                cl       = cl,
                output   = self.args.output,
                verbose  = verbose) if res is not None else (False, {"ntoys" : 0})

            print '>> Point %s [%i toys, %s]' % (name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE')
            
            if ok:
                complete_points += 1
            
            # Make plots of the test statistic distributions if requested
            if res is not None and make_plots:
                self.PlotTestStat(res, 'plot_'+name, opts = cfg['plot_settings'], poi_vals = (float(key[0]), float(key[1])))
  
            # Add the resulting CLs values to the output arrays. Normally just
            # for the model points that passed the validation criteria, but if "output_incomplete"
            # has been set to true then we'll write all model points where at least one HypoTestResult
            # is present
            if res is not None and (ok or incomplete) and self.args.output:
                output_x.append(float(key[0]))
                output_y.append(float(key[1]))
                output_ntoys.append(point_res['ntoys'])
                for contour in contours:
                    output_data[contour].append(point_res[contour][0])
                    output_clserr[contour].append(point_res[contour][1])
                    output_signif[contour].append(point_res[contour][2])
            
            # Do the job cycle generation if requested
            if not ok and self.args.cycles > 0:
                print '>>> Going to generate %i job(s) for point %s' % (self.args.cycles, key)
                # Figure out the next seed numbers we need to run by finding the maximum seed number
                # so far
                done_cycles = val.keys()
                new_idx = max(done_cycles)+1 if len(done_cycles) > 0 else 1
                new_cycles = range(new_idx, new_idx+self.args.cycles)
                
                print '>>> Done cycles: ' + ','.join(str(x) for x in done_cycles)
                print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles)
                
                # Build to combine command. Here we'll take responsibility for setting the name and the
                # model parameters, making sure the latter are frozen
                set_arg = ','.join(['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set)
                freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze)
                point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (name, set_arg, freeze_arg)
                # Build a command for each job cycle setting the number of toys and random seed and passing through any other
                # user options from the config file or the command line
                for idx in new_cycles:
                    cmd = ' '.join(['combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx] + self.passthru)
                    self.job_queue.append(cmd)

        print ">> %i/%i points have completed and require no further toys" % (complete_points, total_points)
        self.flush_queue()

        # Create and write output CLs TGraph2Ds here
        # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes
        if self.args.output:
            fout = ROOT.TFile(outfile, 'RECREATE')
            for c in contours:
                graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c]))
                graph.SetName(c)
                fout.WriteTObject(graph, c)
                # Also write a Graph with the CLsErr
                graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c]))
                graph.SetName('clsErr_'+c)
                fout.WriteTObject(graph, 'clsErr_'+c)
                # And a Graph with the significance
                graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c]))
                graph.SetName('signif_'+c)
                fout.WriteTObject(graph, 'signif_'+c)
            graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys))
            graph.SetName('ntoys'+c)
            fout.WriteTObject(graph, 'ntoys')
            fout.Close()
    def run_method(self):
        # Put the method back in because we always take it out
        self.put_back_arg('method', '-M')

        # cmd_queue = []
        subbed_vars = {}

        # pre_cmd = ''

        if self.args.mass is not None:
            mass_vals = utils.split_vals(self.args.mass)
            subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals]
            self.passthru.extend(['-m', '%(MASS)s'])

        if self.args.singlePoint is not None:
            single_points = utils.split_vals(self.args.singlePoint)
            subbed_vars[('SINGLEPOINT',)] = [(pval,) for pval in single_points]
            self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s'])
            self.args.name += '.POINT.%(SINGLEPOINT)s'

        if self.args.seed is not None:
            seed_vals = utils.split_vals(self.args.seed)
            subbed_vars[('SEED',)] = [(sval,) for sval in seed_vals]
            self.passthru.extend(['-s', '%(SEED)s'])

        if len(self.args.datacard) >= 1:
            # Two lists of tuples, one which does specify the mass, and one
            # which doesn't
            dc_mass = []
            dc_no_mass = []
            for dc in self.args.datacard:
                # Split workspace into path and filename
                path, file = os.path.split(dc)
                # If the wsp is in the current directory should call it '.'
                if path == '':
                    path = '.'
                # If we're not using the --there option then leave the
                # workspace argument as the full path
                if not self.args.there:
                    file = dc
                # Figure out if the enclosing directory is a mass value
                dirs = path.split('/')
                if self.args.mass is None and len(dirs) >= 1 and isfloat(dirs[-1]):
                    print 'Assuming card %s uses mass value %s' % (dc, dirs[-1])
                    dc_mass.append((path, file, dirs[-1]))
                dc_no_mass.append((path, file))
            # If at least one mass value was inferred assume all of them are like this
            if len(dc_mass) > 0:
                subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass
                self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s'])
            else:
                subbed_vars[('DIR', 'DATACARD',)] = dc_no_mass
                self.passthru.extend(['-d', '%(DATACARD)s'])
        # elif len(self.args.datacard) == 1:
        #     self.passthru.extend(['-d', self.args.datacard[0]])

        if self.args.boundlist is not None:
            with open(self.args.boundlist) as json_file:
                bnd = json.load(json_file)
            # find the subbed_vars entry containing the mass
            # We will extend it to also specify the ranges
            dict_key = None
            mass_idx = None
            for key in subbed_vars.keys():
                if 'MASS' in key:
                    dict_key = key
                    mass_idx = dict_key.index('MASS')
            new_key = dict_key + ('MODELBOUND',)
            new_list = []
            for entry in subbed_vars[dict_key]:
                command = []
                mval = entry[mass_idx]
                for model in bnd:
                    command.append(model+'=0,'+str(bnd[model][mval]))
                new_list.append(entry + (':'.join(command),))
            # now remove the current mass information from subbed_vars
            # and replace it with the updated one
            del subbed_vars[dict_key]
            subbed_vars[new_key] = new_list
            self.passthru.extend(['--setPhysicsModelParameterRanges',  '%(MODELBOUND)s'])

        if self.args.points is not None:
            self.passthru.extend(['--points', self.args.points])
        if (self.args.split_points is not None and
                self.args.split_points > 0 and
                self.args.points is not None):
            points = int(self.args.points)
            split = self.args.split_points
            start = 0
            ranges = []
            while (start + (split - 1)) <= points:
            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root"
            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
            #        # Send job, if the file it's supposed to create doesn't exist yet
            #        # or if the file is empty because the previous job didn't finish
                ranges.append((start, start + (split - 1)))
                start += split
            if start < points:
            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root"
            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
                ranges.append((start, points - 1))
            #if (ranges == []):
            #    print "No jobs were created; All files already exist"
            #    exit()
            subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges]
            self.passthru.extend(
                ['--firstPoint %(P_START)s --lastPoint %(P_END)s'])
            self.args.name += '.POINTS.%(P_START)s.%(P_END)s'

        # can only put the name option back now because we might have modified
        # it from what the user specified
        self.put_back_arg('name', '-n')
        proto = 'combine ' + (' '.join(self.passthru))
        if self.args.there:
            proto = 'pushd %(DIR)s; combine ' + (' '.join(self.passthru))+'; popd'

        for it in itertools.product(*subbed_vars.values()):
            keys = subbed_vars.keys()
            dict = {}
            for i, k in enumerate(keys):
                for tuple_i, tuple_ele in enumerate(k):
                    dict[tuple_ele] = it[i][tuple_i]
            self.job_queue.append(proto % dict)
        self.flush_queue()
Ejemplo n.º 12
0
  def run_method(self):
    # This is what the logic should be:
    #  - get the list of model points
    #  - figure out which files are:
    #    - completely missing
    #    - there but corrupt, missing tree
    #    - ok
    #  - If we have anything in the third category proceed to produce output files
    #  - Anything in the first two gets added to the queue only if --doFits is specified
    #    so that the 


    # Step 1 - open the json config file
    with open(self.args.config) as json_file:    
        cfg = json.load(json_file)
    # to do - have to handle the case where it doesn't exist
    points = []
    for igrid in cfg['grids']:
      assert(len(igrid) == 2)
      points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1])))

    POIs = cfg['POIs']

    file_dict = { }
    for p in points:
      file_dict[p] = []

    for f in glob.glob('higgsCombine.%s.*.%s.*.Asymptotic.mH*.root' % (POIs[0], POIs[1])):
      # print f
      rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.Asymptotic\.mH.*\.root' % (POIs[0], POIs[1]))
      matches = rgx.search(f)
      p = (matches.group('p1'), matches.group('p2'))
      if p in file_dict:
        file_dict[p].append(f)

    for key,val in file_dict.iteritems():
      name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1])
      print '>> Point %s' % name
      if len(val) == 0:
        print 'Going to run limit for point %s' % (key,)
        point_args = '-n .%s --setPhysicsModelParameters %s=%s,%s=%s --freezeNuisances %s,%s' % (name, POIs[0], key[0], POIs[1], key[1], POIs[0], POIs[1])
        cmd = ' '.join(['combine -M Asymptotic', cfg['opts'], point_args] + self.passthru)
        self.job_queue.append(cmd)

    bail_out = len(self.job_queue) > 0
    self.flush_queue()

    if bail_out: 
        print ">> New jobs were created / run in this cycle, run the script again to collect the output"
        sys.exit(0)

    xvals = []
    yvals = []
    zvals = []
    for key,val in file_dict.iteritems():
      for filename in val:
        fin = ROOT.TFile(filename)
        if fin.IsZombie(): continue
        tree = fin.Get('limit')
        for evt in tree:
          if evt.quantileExpected == -1:
            print 'At point %s have observed CLs = %f' % (key, evt.limit)
            xvals.append(float(key[0]))
            yvals.append(float(key[1]))
            zvals.append(float(evt.limit))
    graph = ROOT.TGraph2D(len(zvals), array('d', xvals), array('d', yvals), array('d', zvals))
    h_bins = cfg['hist_binning']
    hist = ROOT.TH2F('h_observed', '', h_bins[0], h_bins[1], h_bins[2], h_bins[3], h_bins[4], h_bins[5])
    for i in xrange(1, hist.GetNbinsX()+1):
      for j in xrange(1, hist.GetNbinsY()+1):
        hist.SetBinContent(i, j, graph.Interpolate(hist.GetXaxis().GetBinCenter(i), hist.GetYaxis().GetBinCenter(j)))
    fout = ROOT.TFile('asymptotic_grid.root', 'RECREATE')
    fout.WriteTObject(graph, 'observed')
    fout.WriteTObject(hist)
    fout.Close()
Ejemplo n.º 13
0
    def run_method(self):
        # Put the method back in because we always take it out
        self.put_back_arg('method', '-M')

        # cmd_queue = []
        subbed_vars = {}

        # pre_cmd = ''

        if self.args.mass is not None:
            mass_vals = utils.split_vals(self.args.mass)
            subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals]
            self.passthru.extend(['-m', '%(MASS)s'])

        if self.args.singlePoint is not None:
            single_points = utils.split_vals(self.args.singlePoint)
            subbed_vars[('SINGLEPOINT',)] = [(pval,) for pval in single_points]
            self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s'])
            self.args.name += '.POINT.%(SINGLEPOINT)s'

        if self.args.seed is not None:
            seed_vals = utils.split_vals(self.args.seed)
            subbed_vars[('SEED',)] = [(sval,) for sval in seed_vals]
            self.passthru.extend(['-s', '%(SEED)s'])

        for i, generate in enumerate(self.args.generate):
            split_char = ':' if '::' in generate else ';'
            gen_header, gen_content = generate.split(split_char*2)
            print gen_header
            print gen_content
            gen_headers = gen_header.split(split_char)
            gen_entries = gen_content.split(split_char)
            key = tuple()
            arglist = []
            for header in gen_headers:
                if header == 'n' or header == 'name':
                    self.args.name += '.%(GENNAME' + str(i) + ')s'
                    key += ('GENNAME' + str(i),)
                else:
                    self.passthru.extend(['%(' + header + ')s'])
                    key += (header,)
            for entry in gen_entries:
                if ',,' in entry:
                    split_entry = entry.split(',,')
                else:
                    split_entry = entry.split(',')
                final_arg = []
                for header, e in zip(gen_headers, split_entry):
                    argname = '-%s' % header if len(header) == 1 else '--%s' % header
                    if header == 'n' or header == 'name':
                        final_arg.append(e)
                    elif len(e) and e != '!':
                        final_arg.append('%s %s' % (argname, e))
                    else:
                        final_arg.append('')
                arglist.append(tuple(final_arg))
            subbed_vars[key] = arglist


        if len(self.args.datacard) >= 1:
            # Two lists of tuples, one which does specify the mass, and one
            # which doesn't
            dc_mass = []
            dc_no_mass = []
            for dc in self.args.datacard:
                # Split workspace into path and filename
                path, file = os.path.split(dc)
                # If the wsp is in the current directory should call it '.'
                if path == '':
                    path = '.'
                # If we're not using the --there option then leave the
                # workspace argument as the full path
                if not self.args.there:
                    file = dc
                # Figure out if the enclosing directory is a mass value
                dirs = path.split('/')
                if self.args.mass is None and len(dirs) >= 1 and isfloat(dirs[-1]):
                    print 'Assuming card %s uses mass value %s' % (dc, dirs[-1])
                    dc_mass.append((path, file, dirs[-1]))
                dc_no_mass.append((path, file))
            # If at least one mass value was inferred assume all of them are like this
            if len(dc_mass) > 0:
                subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass
                self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s'])
            else:
                subbed_vars[('DIR', 'DATACARD',)] = dc_no_mass
                self.passthru.extend(['-d', '%(DATACARD)s'])
        # elif len(self.args.datacard) == 1:
        #     self.passthru.extend(['-d', self.args.datacard[0]])

        current_ranges = self.args.setPhysicsModelParameterRanges
        put_back_ranges = current_ranges is not None

        if self.args.boundlist is not None:
            # We definitely don't need to put the parameter ranges back
            # into the args because they're going in via the boundlist
            # option instead
            put_back_ranges = False
            with open(self.args.boundlist) as json_file:
                bnd = json.load(json_file)
            bound_pars = list(bnd.keys())
            print 'Found bounds for parameters %s' % ','.join(bound_pars)
            # Fill a dictionaries of the bound info of the form:
            #  { 'PAR1' : [(MASS, LOWER, UPER), ...], ...}
            bound_vals = {}
            for par in bound_pars:
                bound_vals[par] = list()
                for mass, bounds in bnd[par].iteritems():
                    bound_vals[par].append((float(mass), bounds[0], bounds[1]))
                bound_vals[par].sort(key=lambda x: x[0])
            # find the subbed_vars entry containing the mass
            # We will extend it to also specify the ranges
            dict_key = None
            mass_idx = None
            for key in subbed_vars.keys():
                if 'MASS' in key:
                    dict_key = key
                    mass_idx = dict_key.index('MASS')
            new_key = dict_key + ('MODELBOUND',)
            new_list = []
            for entry in subbed_vars[dict_key]:
                command = []
                if current_ranges is not None:
                    command.append(current_ranges)
                mval = entry[mass_idx]
                for par in bound_pars:
                    # The (mass, None, None) is just a trick to make bisect_left do the comparison
                    # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
                    # issues
                    lower_bound = bisect.bisect_left(bound_vals[par], (float(mval)+1E-5, None, None))
                    # If lower_bound == 0 this means we are at or below the lowest mass point,
                    # in which case we should increase by one to take the bounds from this lowest
                    # point
                    if lower_bound == 0:
                        lower_bound += 1
                    command.append('%s=%g,%g' % (par, bound_vals[par][lower_bound-1][1], bound_vals[par][lower_bound-1][2]))
                new_list.append(entry + (str(':'.join(command)),))
            # now remove the current mass information from subbed_vars
            # and replace it with the updated one
            del subbed_vars[dict_key]
            subbed_vars[new_key] = new_list
            self.passthru.extend(['--setPhysicsModelParameterRanges',  '%(MODELBOUND)s'])

        # We might need to put the intercepted --setPhysicsModelParameterRanges arg back in
        if put_back_ranges:
            self.put_back_arg('setPhysicsModelParameterRanges', '--setPhysicsModelParameterRanges')

        if self.args.points is not None:
            self.passthru.extend(['--points', self.args.points])
        if (self.args.split_points is not None and
                self.args.split_points > 0 and
                self.args.points is not None):
            points = int(self.args.points)
            split = self.args.split_points
            start = 0
            ranges = []
            while (start + (split - 1)) < points:
            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root"
            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
            #        # Send job, if the file it's supposed to create doesn't exist yet
            #        # or if the file is empty because the previous job didn't finish
                ranges.append((start, start + (split - 1)))
                start += split
            if start < points:
            #    filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root"
            #    if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024):
                ranges.append((start, points - 1))
            #if (ranges == []):
            #    print "No jobs were created; All files already exist"
            #    exit()
            subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges]
            self.passthru.extend(
                ['--firstPoint %(P_START)s --lastPoint %(P_END)s'])
            self.args.name += '.POINTS.%(P_START)s.%(P_END)s'

        # can only put the name option back now because we might have modified
        # it from what the user specified
        self.put_back_arg('name', '-n')
        proto = 'combine ' + (' '.join(self.passthru))
        if self.args.there:
            proto = 'pushd %(DIR)s; combine ' + (' '.join(self.passthru))+'; popd'

        for it in itertools.product(*subbed_vars.values()):
            keys = subbed_vars.keys()
            dict = {}
            for i, k in enumerate(keys):
                for tuple_i, tuple_ele in enumerate(k):
                    dict[tuple_ele] = it[i][tuple_i]
            self.job_queue.append(proto % dict)
        self.flush_queue()