def _init_fastnlo(self): # fastNLOReader instance self._fnlo = fastNLOLHAPDF(self._table_filename, self._lhgrid_filename, 0) self._fnlo.SetLHAPDFMember(self._member) # Do this immediately to be able to read out nmember self._fnlo.FillPDFCache()
def run(self, plotData): for filename, pdfset, member in zip( plotData.plotdict['fastnlo_files'], plotData.plotdict['pdf_sets'], plotData.plotdict['members'] ): fnlo = fastNLOLHAPDF(str(filename)) fnlo.SetLHAPDFFilename(str(pdfset)) fnlo.SetLHAPDFMember(member) fnlo.CalcCrossSection() # create histogram x_binning = sorted(list(set([item for sublist in fnlo.GetDim0BinBoundaries() for item in sublist]))) root_histogram = ROOT.TH1D(str(member),str(member),len(x_binning)-1, array('d', x_binning)) # fill values for central xsec xs = np.array(fnlo.GetCrossSection()) xs[xs <= 0.] = 0. # ? for i in range(0, fnlo.GetNDim0Bins()): root_histogram.SetBinContent(i+1, xs[i]) # append nick and histo to plotdict nick = "_".join([filename, pdfset, str(member)]) plotData.plotdict.setdefault("nicks", []).append(nick) plotData.plotdict.setdefault("root_objects", {})[nick] = root_histogram
def run(self, plotData): import fastnlo for filename, pdfset, member, nick, kfactor, unctype, uncstyle in zip( plotData.plotdict['fastnlo_files'], plotData.plotdict['pdf_sets'], plotData.plotdict['members'], plotData.plotdict['fastnlo_nicks'], plotData.plotdict['k_factors'], plotData.plotdict['uncertainty_type'], plotData.plotdict['uncertainty_style'], ): fnlo = fastnlo.fastNLOLHAPDF(str(filename)) fnlo.SetLHAPDFFilename(str(pdfset)) fnlo.SetLHAPDFMember(member) fnlo.UseHoppetScaleVariations(True) fnlo.CalcCrossSection() x_binning = sorted(list(set([item for sublist in fnlo.GetDim0BinBounds() for item in sublist]))) if unctype is None: # create histogram root_object = ROOT.TH1D(str(member),str(member),len(x_binning)-1, array('d', x_binning)) # fill values for central xsec xs = np.array( getattr(fnlo, ("GetKFactors" if kfactor else "GetCrossSection"))() ) xs[xs <= 0.] = 0. # ? for i in range(0, fnlo.GetNDim0Bins()): root_object.SetBinContent(i+1, xs[i]) else: # if uncertainties should be calculated, TGraphAsymmErrors must be used if kfactor: cross_sections, error_up, error_down = fnlo.GetKFactors(), [0.]*len(fnlo.GetKFactors()), [0.]*len(fnlo.GetKFactors()) else: cross_sections, error_up, error_down = (getattr(fnlo, "Get{}UncertaintyVec".format(unctype)))(getattr(fastnlo, uncstyle)) root_object = ROOT.TGraphAsymmErrors(len(cross_sections)) for i, xs in enumerate(cross_sections): x_center = 0.5*(x_binning[i] + x_binning[i+1]) root_object.SetPoint(i, x_center, xs) root_object.SetPointEYhigh(i, xs*error_up[i]) root_object.SetPointEYlow(i, xs*abs(error_down[i])) root_object.SetPointEXlow(i, x_center-x_binning[i]) root_object.SetPointEXhigh(i, x_binning[i+1]-x_center) # append nick and histo to plotdict if nick is None: nick = "_".join([filename, pdfset, str(member)]) plotData.plotdict.setdefault("nicks", []).append(nick) plotData.plotdict.setdefault("root_objects", {})[nick] = root_object
def run(self, plotData): for filename, pdfset, member, nick, kfactor in zip( plotData.plotdict['fastnlo_files'], plotData.plotdict['pdf_sets'], plotData.plotdict['members'], plotData.plotdict['fastnlo_nicks'], plotData.plotdict['k_factors'], ): fnlo = fastnlo.fastNLOLHAPDF(str(filename)) fnlo.SetLHAPDFFilename(str(pdfset)) fnlo.SetLHAPDFMember(member) fnlo.CalcCrossSection() x_binning = sorted(list(set([item for sublist in fnlo.GetDim0BinBounds() for item in sublist]))) if plotData.plotdict['uncertainty_style'] is None: # create histogram root_object = ROOT.TH1D(str(member),str(member),len(x_binning)-1, array('d', x_binning)) # fill values for central xsec xs = np.array( getattr(fnlo, ("GetKFactors" if kfactor else "GetCrossSection"))() ) xs[xs <= 0.] = 0. # ? for i in range(0, fnlo.GetNDim0Bins()): root_object.SetBinContent(i+1, xs[i]) else: # if uncertainties should be calculated, TGraphAsymmErrors must be used if kfactor: cross_sections, error_up, error_down = fnlo.GetKFactors(), [0.]*len(fnlo.GetKFactors()), [0.]*len(fnlo.GetKFactors()) else: cross_sections, error_up, error_down = (getattr(fnlo, "Get{}UncertaintyVec".format(plotData.plotdict['uncertainty_type'])))(getattr(fastnlo, plotData.plotdict['uncertainty_style'])) root_object = ROOT.TGraphAsymmErrors(len(cross_sections)) for i, xs in enumerate(cross_sections): x_center = 0.5*(x_binning[i] + x_binning[i+1]) root_object.SetPoint(i, x_center, xs) root_object.SetPointEYhigh(i, xs*error_up[i]) root_object.SetPointEYlow(i, xs*abs(error_down[i])) root_object.SetPointEXlow(i, x_center-x_binning[i]) root_object.SetPointEXhigh(i, x_binning[i+1]-x_center) # append nick and histo to plotdict if nick is None: nick = "_".join([filename, pdfset, str(member)]) plotData.plotdict.setdefault("nicks", []).append(nick) plotData.plotdict.setdefault("root_objects", {})[nick] = root_object
def __init__(self, table_filename, lhgrid_filename, member=0, scale_factor=(1.0, 1.0), errortype='auto', pdf_clscale = None): self._table_filename = table_filename self._lhgrid_filename = lhgrid_filename self._member = member self._scale_factor = scale_factor if errortype is 'auto': self._identify_errortype() else: self._errortype = errortype self._pdf_clscale = pdf_clscale # fastNLOReader instance # SetGlobalVerbosity(1) #SetGlobalVerbosity(10) self._fnlo = fastNLOLHAPDF(self._table_filename, self._lhgrid_filename) self._fnlo.SetLHAPDFMember(self._member) # Do this immediately to be able to read out nmember self._fnlo.FillPDFCache() # infos about pdfs and bins self._npdfmembers = self._fnlo.GetNPDFMembers() self._nobsbins = self._fnlo.GetNObsBins() self._ndiffbins = self._fnlo.GetNDiffBin() # Get Differential Bins self._bins_down = numpy.array(self._fnlo.GetLowBinEdge()).transpose() self._bins_up = numpy.array(self._fnlo.GetUpBinEdge()).transpose() # Member Cross Sections # 1000 member * 1000 obsbins * 10 skalen* 64 / 8 / 100000 = 80 MB in worst case # too much: one array per scale self._member_crosssections = None
def get_fnlo(table, pdfset): """ Get the cross section values from the table for a certain pdfset""" xs_nlo = {} fnlo = fastNLOLHAPDF(table) fnlo.SetLHAPDFFilename(pdfset) fnlo.SetLHAPDFMember(0) fnlo.CalcCrossSection() npdfmember = fnlo.GetNPDFMembers() xs_nlo['xsnlo'] = np.zeros((npdfmember, fnlo.GetNObsBin())) xs_nlo['scale'] = np.array(fnlo.GetQScales()) xs_nlo['y_low'] = [_bin[0] for _bin in fnlo.GetObsBinsBounds(0)] xs_nlo['y_high'] = [_bin[1] for _bin in fnlo.GetObsBinsBounds(0)] for i in range(npdfmember): fnlo.SetLHAPDFMember(i) fnlo.CalcCrossSection() xs_nlo['xsnlo'][i - 1] = fnlo.GetCrossSection() xs_nlo['xsnlo'] = xs_nlo['xsnlo'].transpose() return xs_nlo
def get_fnlo(table, pdfset): """ """ xs_nlo = {} fnlo = fastNLOLHAPDF(table) fnlo.SetLHAPDFFilename(pdfset) fnlo.SetLHAPDFMember(0) fnlo.CalcCrossSection() npdfmember = fnlo.GetNPDFMembers() xs_nlo['xsnlo'] = np.zeros((npdfmember - 1, fnlo.GetNObsBin(),)) xs_nlo['scale'] = np.array(fnlo.GetQScales(1)) # xs_nlo['bi_lo'] = np.array([fnlo.GetObsBin(i)[1] for i in range(0,fnlo.GetNObsBin())]).transpose()[0] # xs_nlo['bi_hi'] = np.array([fnlo.GetObsBin(i)[1] for i in range(0,fnlo.GetNObsBin())]).transpose()[1] ######################### #xs_nlo['pt_low'], xs_nlo['y_low'] = np.array(fnlo.GetLowBinEdge()).transpose() #xs_nlo['pt_high'], xs_nlo['y_high'] = np.array(fnlo.GetUpBinEdge()).transpose() xs_nlo['y_low'] = np.array(fnlo.GetLoBin(0)) xs_nlo['y_high'] = np.array(fnlo.GetUpBin(0)) xs_nlo['pt_low'] = xs_nlo['y_low'] xs_nlo['pt_high'] = xs_nlo['y_high'] ####################### for i in range(1, npdfmember): fnlo.SetLHAPDFMember(i) fnlo.CalcCrossSection() xs_nlo['xsnlo'][i - 1] = fnlo.GetCrossSection() xs_nlo['xsnlo'] = xs_nlo['xsnlo'].transpose() return xs_nlo
def main( member=0, input_filename='fnlo_yZ.tab', output_filename='zpt.root', pdf_set=( #'../NNPDF21_100.LHgrid' 'CT10nlo.LHgrid' ), ): # init fnlo fnlo = fastNLOLHAPDF(input_filename) fnlo.SetLHAPDFFilename(pdf_set) fnlo.SetLHAPDFMember(member) fnlo.CalcCrossSection() out = ROOT.TFile(output_filename, "RECREATE") print "PDF member:", member, " output_filename:", output_filename # make histo x_binning = sorted(list(set([item for sublist in fnlo.GetDim0BinBounds() for item in sublist]))) histo = ROOT.TH1D(str(member),str(member),len(x_binning)-1, array('d', x_binning)) # fill values for central xsec xs = np.array(fnlo.GetCrossSection()) xs[xs <= 0.] = 0. # ? for i in range(0, fnlo.GetNDim0Bins()): histo.SetBinContent(i+1, xs[i]) histo.Write() # errors for PDF variations if False: print "Calulating errors for {} PDF variations".format(fnlo.GetNPDFMembers() - 1) errors = [0.] * len(x_binning) for i in range(1, fnlo.GetNPDFMembers()): fnlo.SetLHAPDFMember(i) fnlo.CalcCrossSection() xsec = fnlo.GetCrossSection() for j in range(len(xsec)): errors[j] += ((xsec[j]-xs[j])/xs[j])**2 # sum up errors in QUADRATURE for i, quad_error in enumerate(errors): errors[i] = math.sqrt(quad_error) # root of squared errors # put PDF errors in graph pdf_uncertainty = ROOT.TGraph() pdf_uncertainty.SetName("pdf_uncertainty") for i, error in enumerate(errors): pdf_uncertainty.SetPoint(i, x_binning[i], error) pdf_uncertainty.Write() if False: # dont use for now # scale uncertainties variations = [0.5, 1, 2] errors = [0.] * len(x_binning) for mur in variations: for muf in variations: fnlo.SetScaleFactorsMuRMuF(mur, muf) fnlo.CalcCrossSection() xsec = fnlo.GetCrossSection() for i, xsec_bin in xsec: errors[i] = max(errors[i], abs(xsec_bin -xs[i])/xs[i]) # put scale errors in graph scale_uncertainty = ROOT.TGraph() scale_uncertainty.SetName("scale_uncertainty") for i, error in enumerate(errors): scale_uncertainty.SetPoint(i, x_binning[i], error) scale_uncertainty.Write() # finish print "histogram written to file", output_filename out.Close()
def main(): parser = argparse.ArgumentParser( description='Statistical analysis of fastNLO tables') parser.add_argument('-i', '--input-folder', help='Folder containing the fastNLO files.', required=True) parser.add_argument('--work-dir', help='Workdir.') parser.add_argument('--pdfset', default='CT10nlo', help='PDF set to evaluate fastNLO tables.') parser.add_argument('-r', '--regex', default='^.*nlo.*$', help='Regex matching tables in input folder.') parser.add_argument('-m', '--max-processes', type=int, default=8, help='Max number of parallel processes') parser.add_argument('--filter', action='store_true', default=False, help='Filter invalid tables.') parser.add_argument('-s', '--stds', type=float, default=100., help='number of standard deviations a \ table is allowed to deviate from the median to not be considered critical' ) parser.add_argument("--log-level", default="info", help="Log level.") # Parse arguments. args = vars(parser.parse_args()) if args['work_dir'] is None: args['work_dir'] = args['input_folder'] # Setup logger and log level log_level = getattr(logging, args['log_level'].upper(), None) if not isinstance(log_level, int): raise ValueError('Invalid log level: %s' % loglevel) logging.basicConfig(format='%(message)s', level=log_level) log.info('Globbing all .tab files in input directory.') log.debug('Regex for NLO tables is \'{0}\'.'.format(args['regex'])) # Find all fastNLO tables in input folder fnlo_tables = glob.glob(os.path.join(args['input_folder'], '*.tab')) tables_files = [ table for table in fnlo_tables if re.match(args['regex'], os.path.basename(table)) ] log.info('Found {0} tables in input directory.'.format(len(tables_files))) if len(tables_files) < 1: log.error("no tables!") sys.exit(1) # Read one table to get number of bins _fnlo = fastnlo.fastNLOLHAPDF(tables_files[0], args['pdfset']) n_bins = _fnlo.GetNObsBin() log.info('Tables contain {0} observable bins.'.format(n_bins)) # get cross section values n_procs = min([args['max_processes'], len(tables_files)]) log.info("Get cross section from fastNLO tables using {} processes".format( n_procs)) pool = multiprocessing.Pool(processes=n_procs) results = pool.map_async(gettab, [(tab, args['pdfset']) for tab in tables_files]) xs_nlo = np.array( results.get(9999999) ) # 9999999 is needed for KeyboardInterrupt to work: http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool # calculate statistical estimators mean = np.mean(xs_nlo, axis=0) std = np.std(xs_nlo, axis=0) mean_error = std / math.sqrt(float(len(tables_files))) mean_error_rel = mean_error / mean mean_error_rel_percent = mean_error_rel * 100. median = np.median(xs_nlo, axis=0) tmean = trimmed_mean(xs_nlo, axis=0, percentile=0.1) tstd = trimmed_std(xs_nlo, axis=0, percentile=0.1) # print results for values in [ 'mean', 'tmean', 'std', 'mean_error', 'mean_error_rel', 'mean_error_rel_percent', 'tstd', 'median', 'std/mean', 'tstd/mean', 'mean/median' ]: log.info(values) values = eval(values) magn = min([int(math.log10(x)) for x in values]) log.info( np.array([round(value, max([0, 2 - magn])) for value in values])) #plot plot_distribution(xs_nlo, plot_dir='nlo_plots', **args) # Find all tables where any bin is > x std off from the mean invalid_nlo_tables = np.array(tables_files)[np.any( xs_nlo - median > args['stds'] * xs_nlo.std(axis=0), axis=1)] if invalid_nlo_tables.size != 0: log.warning( 'There are tables with potential problems (any bin with xsec {} sigma away from median):' .format(args['stds'])) log.info('\n'.join(invalid_nlo_tables)) if args['filter']: directory = os.path.join(args['work_dir'], 'invalid_nlo_tables') log.info('The tables will be moved into the directory {0}.'.format( directory)) if not os.path.exists(directory): os.makedirs(directory) for filename in invalid_nlo_tables: shutil.move(filename, directory)
def gettab(arg): return np.array(fastnlo.fastNLOLHAPDF(*arg).GetCrossSection())