def _sync(path): global _current_path, _current_pack, _changed if use_analysis_cwd: path = os.path.join(analysis.cwd, path) if path == _current_path: return # write out and load if possible _write_out() _current_path = path data_path = os.path.join(_current_path, 'data.pkl') if not os.path.exists(data_path): _current_pack = {} else: with open(data_path) as f: try: _current_pack = cPickle.load(f) except Exception as e: msg = 'ERROR with file: %s' % data_path e.message += msg monitor.message('pklio', msg) raise assert isinstance(_current_pack, dict), 'path: %s' % data_path
def gen_squash_sys_acc(wrps, accumulator, calc_sys_integral=False): """ Adds one-sided sys' quadratically and builds envelope from up and down. """ wrps = list(wrps) if not any(w.sys_info for w in wrps) or not any(w.sys_info == '' for w in wrps): return accumulator(wrps) def sys_info_key(w): return w.sys_info sys_tup = [] if calc_sys_integral: nwrps = gen_copy(wrps) nwrps = sorted(nwrps, key=lambda w: w.sample) nwrps = group(nwrps, lambda w: w.sample) try: nwrps = (gen_squash_sys(ngrp) for ngrp in nwrps) sys_tup = list((nw.legend, (op.get_sys_int(nw))) for nw in nwrps) except op.OperationError as e: monitor.message('generators.gen_squash_sys_acc', 'WARNING catching error: \n' + str(e)) # accumulate (e.g. stack) every sys-type by itself wrps = sorted(wrps, key=sys_info_key) wrps = group(wrps, sys_info_key) wrps = (accumulator(ws) for ws in wrps) wrp_acc = gen_squash_sys(wrps) if sys_tup: for s, i in sys_tup: setattr(wrp_acc, s + '__sys', i) return wrp_acc
def _sync(path): global _current_path, _current_pack, _changed if use_analysis_cwd: path = os.path.join(analysis.cwd, path) if path == _current_path: return # write out and load if possible _write_out() _current_path = path data_path = os.path.join(_current_path, 'data.pkl') if not os.path.exists(data_path): _current_pack = {} else: with open(data_path) as f: try: _current_pack = cPickle.load(f) except Exception as e: msg = 'ERROR with file: %s' % data_path e.message += msg monitor.message('pklio', msg) raise assert isinstance(_current_pack, dict), 'path: %s' % data_path
def read(filename): """Reads wrapper from disk, including root objects.""" filename = prepare_basename(filename) + '.info' with open(filename) as f: try: info = _read_wrapper_info(f) except ValueError as e: monitor.message( 'diskio.read', 'ERROR can not read info: ' + filename ) raise e if 'root_filename' in info: _read_wrapper_objs(info, dirname(filename)) klass = getattr(wrappers, info.get('klass')) if klass == wrappers.WrapperWrapper: p = dirname(filename) info['wrps'] = _read_wrapperwrapper( join(p, f) for f in info['wrpwrp_names'] ) wrp = klass(**info) _clean_wrapper(wrp) return wrp
def record_in_save_log(filename): if filename in _save_log: monitor.message( 'diskio', 'WARNING Overwriting file from this session: %s' % filename) else: _save_log.add(filename)
def gen_squash_sys(wrps): """ Adds one-sided sys' quadratically and builds envelope from up and down. """ def sys_info_key(w): if w.sys_info.endswith(settings.sys_var_token_up): return w.sys_info[:-len(settings.sys_var_token_up)] if w.sys_info.endswith(settings.sys_var_token_down): return w.sys_info[:-len(settings.sys_var_token_down)] return 0 # sort for plus and minus and get lists wrps = sorted(wrps, key=sys_info_key) wrps = group(wrps, sys_info_key) wrps = list(list(ws) for ws in wrps) # [[nom], [A__plus, A__minus], [B__plus, B__minus], ...] nominal, nominal_list = wrps[0][0], wrps[0] try: uncertainties = list(op.squash_sys_env(ws) for ws in wrps[1:]) # [A, B, ...] sys_uncert = op.squash_sys_sq(nominal_list + uncertainties) except op.OperationError as e: monitor.message('generators.gen_squash_sys', 'WARNING catching error: \n' + str(e)) return nominal # put sys on nominal wrp (if nominal is a stack, the stack must be kept) nominal.histo_sys_err = sys_uncert.histo_sys_err return nominal
def _check_readability(wrp): try: literal_eval(wrp.pretty_writeable_lines().replace('\n', '')) except (ValueError, SyntaxError): monitor.message( 'diskio.write', 'WARNING Wrapper will not be readable:\n%s' % str(wrp) )
def record_in_save_log(filename): if filename in _save_log: monitor.message( 'diskio', 'WARNING Overwriting file from this session: %s' % filename ) else: _save_log.add(filename)
def process_settings_kws(kws): # replace setting, if its name already exists. for k, v in kws.iteritems(): if hasattr(settings, k): setattr(settings, k, v) else: monitor.message('main._process_settings_kws', 'WARNING No such setting: %s' % k)
def mk_rootfile_plotter(name="RootFilePlots", pattern=None, input_result_path=None, flat=False, plotter_factory=None, combine_files=False, filter_keyfunc=lambda w: True, auto_legend=True, legendnames=None, **kws): """ Make a plotter chain that plots all content of all rootfiles in cwd. Additional keywords are forwarded to the plotter instanciation. For running the plotter(s), use a Runner. :param name: str, name of the folder in which the output is stored :param pattern: str, search pattern for rootfiles, default: ``*.root`` :param flat: bool, flatten the rootfile structure default: ``False`` :param plotter_factory: factory function for RootFilePlotter default: ``None`` :param combine_files: bool, plot same histograms across rootfiles into the same canvas. Does not work together with ``flat`` option, default: ``False`` """ def plotter_factory_kws(**kws_fctry): kws_fctry.update(kws) if plotter_factory: return plotter_factory(**kws_fctry) else: return Plotter(**kws_fctry) if kws: new_plotter_factory = plotter_factory_kws else: new_plotter_factory = plotter_factory if combine_files: tc = RootFilePlotter(pattern, input_result_path, new_plotter_factory, flat, name, filter_keyfunc, auto_legend, legendnames) else: plotters = list( RootFilePlotter( f, input_result_path, new_plotter_factory, flat, f[:-5].split( '/')[-1], filter_keyfunc, auto_legend, legendnames) for f in glob.iglob(pattern)) if not plotters: monitor.message( 'plotter.mk_rootfile_plotter', 'WARNING no plotters generated for pattern: %s' % pattern) tc = toolinterface.ToolChainParallel(name, plotters) return tc
def process_settings_kws(kws): # replace setting, if its name already exists. for k, v in kws.iteritems(): if hasattr(settings, k): setattr(settings, k, v) else: monitor.message( 'main._process_settings_kws', 'WARNING No such setting: %s' % k )
def __call__(self, w): if ((w.is_data and self.using_pseudo_data) or (w.is_pseudo_data and self.using_real_data)): monitor.message( 'generators.split_data_bkg_sig', 'WARNING I have data and psuedo-data in the same stream!') if w.is_data: self.using_real_data = True return True if w.is_pseudo_data: self.using_pseudo_data = True return True return False
def attribute_printer(iterable, attr): """ Print an attribute of passing objects. :param iterable: An iterable of wrappers :param attr: str, name of the attribute to be printed :yields: same as input """ for obj in iterable: monitor.message( 'generators.attribute_printer', 'INFO: %s: %s' % (attr, getattr(obj, attr, '<not defined>'))) yield obj
def filter_active_samples(wrps): """ Check if wrp.sample is in list of active samples (analysis.active_samples). :param wrps: Wrapper iterable :returns: generator object """ no_active_smpls = not analysis.active_samples if no_active_smpls: monitor.message('generators.filter_active_samples', 'WARNING No active samples defined. Will yield all.') return itertools.ifilter( lambda w: no_active_smpls or (hasattr(w, 'sample') and w.sample in analysis.active_samples), wrps)
def debug_printer(iterable, print_obj=True): """ Print objects and their type on flying by. Object printing can be disabled. :param iterable: An iterable with objects :param print_obj: bool, print whole object :yields: same as input """ for obj in iterable: monitor.message('generators.debug_printer', 'INFO: obj type: %s' % type(obj)) if print_obj: monitor.message('generators.debug_printer', 'obj: %s' % obj) yield obj
def get_open_root_file(filename): if filename in _open_root_files: file_handle = _open_root_files[filename] else: if len(_open_root_files) > settings.max_open_root_files: monitor.message( 'diskio', 'WARNING to many open root files. Closing all. ' 'Please check for lost histograms. ' '(Use hist.SetDirectory(0) to keep them)') close_open_root_files() file_handle = TFile.Open(filename, 'READ') if (not file_handle) or file_handle.IsZombie(): raise RuntimeError('Cannot open file with root: "%s"' % filename) _open_root_files[filename] = file_handle if _in_a_block: _block_of_open_files.append(filename) return file_handle
def get_open_root_file(filename): if filename in _open_root_files: file_handle = _open_root_files[filename] else: if len(_open_root_files) > settings.max_open_root_files: monitor.message( 'diskio', 'WARNING to many open root files. Closing all. ' 'Please check for lost histograms. ' '(Use hist.SetDirectory(0) to keep them)' ) close_open_root_files() file_handle = TFile.Open(filename, 'READ') if (not file_handle) or file_handle.IsZombie(): raise RuntimeError('Cannot open file with root: "%s"' % filename) _open_root_files[filename] = file_handle if _in_a_block: _block_of_open_files.append(filename) return file_handle
def handle(self, signal_int, frame): if signal_int is signal.SIGINT: if not ipython_mode: if self.hits: if multiproc._cpu_semaphore: try: os.killpg(os.getpid(), signal.SIGTERM) except OSError: time.sleep(1) exit(-1) else: monitor.message( 'main.SigintHandler.handle', 'WARNING: SIGINT caught. ' \ 'Aborting processes if any. ' \ 'Crtl-C again to kill immediately!' ) sys.__stdout__.flush() self.hits += 1 settings.recieved_sigint = True
def handle(self, signal_int, frame): if signal_int is signal.SIGINT: if not ipython_mode: if self.hits: if multiproc.cpu_semaphore: try: os.killpg(os.getpid(), signal.SIGTERM) except OSError: time.sleep(1) exit(-1) else: monitor.message( 'main.SigintHandler.handle', 'WARNING: SIGINT caught. ' \ 'Aborting processes if any. ' \ 'Crtl-C again to kill immediately!' ) sys.__stdout__.flush() self.hits += 1 settings.recieved_sigint = True
def sort(wrps, key_list=None): """ Sort stream after items in key_list. Loads full stream into memory. :param wrps: Wrapper iterable :param key_list: (List of) token(s) after which the stream is sorted. First item has highest importance. If ``None``, then ``settings.wrp_sorting_keys`` list is used. :returns: sorted list of wrappers. """ if not key_list: key_list = settings.wrp_sorting_keys # python sorting is stable: Just sort by reversed key_list: wrps = list(wrps) for key in reversed(list(iterableize(key_list))): try: wrps = sorted(wrps, key=operator.attrgetter(key)) except AttributeError: monitor.message('generators.sort', 'WARNING Sorting by "%s" failed.' % key) return wrps
def read(filename): """Reads wrapper from disk, including root objects.""" filename = prepare_basename(filename) + '.info' with open(filename) as f: try: info = _read_wrapper_info(f) except ValueError as e: monitor.message('diskio.read', 'ERROR can not read info: ' + filename) raise e if 'root_filename' in info: _read_wrapper_objs(info, dirname(filename)) klass = getattr(wrappers, info.get('klass')) if klass == wrappers.WrapperWrapper: p = dirname(filename) info['wrps'] = _read_wrapperwrapper( join(p, f) for f in info['wrpwrp_names']) wrp = klass(**info) _clean_wrapper(wrp) return wrp
def bulk_write(wrps, name_func, dir_path='', suffices=None, linlog=False): """Writes wrps en block.""" # prepare if use_analysis_cwd: dir_path = os.path.join(analysis.cwd, dir_path) if not suffices: suffices = settings.rootfile_postfixes infofile = os.path.join(dir_path, _infofile) rootfile = os.path.join(dir_path, _rootfile) # todo with(SyncWriteIo()): for all the next statements # make a dict name -> wrps wrps_dict = dict() for w in wrps: name = name_func(w) assert name, 'function "%s" returns %s for "%s"' % (name_func, repr(name), w) if name in wrps_dict: monitor.message( 'sparseio', 'WARNING Overwriting file "%s" from this session in path: %s' % (name, dir_path)) wrps_dict[name] = w # write out info info = dict( (name, w.all_writeable_info()) for name, w in wrps_dict.iteritems()) with open(infofile, 'w') as f_info: cPickle.dump(info, f_info) # write out root file f_root = TFile.Open(rootfile, 'RECREATE') f_root.cd() for name, w in wrps_dict.iteritems(): dirfile = f_root.mkdir(name, name) dirfile.cd() w.obj.Write(name) dirfile.Close() f_root.Close() # write with suffices for suffix in suffices: if suffix == '.root': continue for name, w in wrps_dict.iteritems(): # root will not store filenames with '[]' correctly. fix: alt_name = name.replace('[', '(').replace(']', ')') img_path = os.path.join(dir_path, alt_name) good_path = os.path.join(dir_path, name) if linlog: w.main_pad.SetLogy(0) w.obj.SaveAs(img_path + '_lin' + suffix) generators.switch_log_scale_single_cnv(w, False, True) w.obj.SaveAs(img_path + '_log' + suffix) generators.switch_log_scale_single_cnv(w, False, False) # reset to lin if alt_name != name: os.rename(img_path + '_lin' + suffix, good_path + '_lin' + suffix) os.rename(img_path + '_log' + suffix, good_path + '_log' + suffix) else: w.obj.SaveAs(img_path + suffix) if alt_name != name: os.rename(img_path + suffix, good_path + suffix) return wrps_dict.values()
def bulk_write(wrps, name_func, dir_path='', suffices=None, linlog=False): """Writes wrps en block.""" # prepare if use_analysis_cwd: dir_path = os.path.join(analysis.cwd, dir_path) if not suffices: suffices = settings.rootfile_postfixes infofile = os.path.join(dir_path, _infofile) rootfile = os.path.join(dir_path, _rootfile) # todo with(SyncWriteIo()): for all the next statements # make a dict name -> wrps wrps_dict = dict() for w in wrps: name = name_func(w) assert name, 'function "%s" returns %s for "%s"' % (name_func, repr(name), w) if name in wrps_dict: monitor.message( 'sparseio', 'WARNING Overwriting file "%s" from this session in path: %s' % (name, dir_path) ) wrps_dict[name] = w # write out info info = dict((name, w.all_writeable_info()) for name, w in wrps_dict.iteritems()) with open(infofile, 'w') as f_info: cPickle.dump(info, f_info) # write out root file f_root = TFile.Open(rootfile, 'RECREATE') f_root.cd() for name, w in wrps_dict.iteritems(): dirfile = f_root.mkdir(name, name) dirfile.cd() w.obj.Write(name) dirfile.Close() f_root.Close() # write with suffices for suffix in suffices: if suffix == '.root': continue for name, w in wrps_dict.iteritems(): # root will not store filenames with '[]' correctly. fix: alt_name = name.replace('[', '(').replace(']', ')') img_path = os.path.join(dir_path, alt_name) good_path = os.path.join(dir_path, name) if linlog: w.main_pad.SetLogy(0) w.obj.SaveAs(img_path+'_lin'+suffix) # if the cnv.first_obj has a member called 'GetMaximum', the # maximum should be greater than zero... if (hasattr(w, 'first_obj') and (not hasattr(w.first_obj, 'GetMaximum') or w.first_obj.GetMaximum() > 1e-9) ): min_val = w.y_min_gr_0 * 0.5 min_val = max(min_val, 1e-9) w.first_obj.SetMinimum(min_val) w.main_pad.SetLogy(1) w.obj.SaveAs(img_path+'_log'+suffix) w.main_pad.SetLogy(0) # reset to lin if alt_name != name: os.rename(img_path+'_lin'+suffix, good_path+'_lin'+suffix) os.rename(img_path+'_log'+suffix, good_path+'_log'+suffix) else: w.obj.SaveAs(img_path+suffix) if alt_name != name: os.rename(img_path+suffix, good_path+suffix) return wrps_dict.values()
def main(**main_kwargs): """ Configure varial and run a toolchain. :param main_kwargs: settings parameters given as keyword arguments are added to settings, e.g. ``samples={"mc":MCSample, ...}`` . :param samples: list of sample.Sample instances :param toolchain: root toolchain (see tools.py) """ splash.print_splash() # iPython mode def ipython_warn(): print "WARNING ==================================================" print "WARNING Detected iPython, going to interactive mode... " print "WARNING ==================================================" if ipython_mode: ipython_warn() atexit.register(tear_down) # else: # signal.signal(signal.SIGINT, sig_handler.handle) # setup samples if 'samples' in main_kwargs: samples = main_kwargs.pop('samples') analysis.all_samples = dict((s.name, s) for s in samples) if 'active_samples' in main_kwargs: analysis.active_samples = main_kwargs.pop('active_samples') # setup toolchain global toolchain toolchain = main_kwargs.pop('toolchain') # process kwargs for settings main_kwargs.update(_get_cmd_arg_settings()) main_args.update(main_kwargs) process_settings_kws(main_kwargs) logfile = settings.logfilename() logpath = os.path.split(logfile)[0] if not os.path.exists(logpath): os.mkdir(logpath) monitor.MonitorInfo.outstream = monitor.StdOutTee(logfile) # print settings? if '--settings' in sys.argv: import inspect print "Memberes of the settings module:" for member in dir(settings): if member[0] == '_' or inspect.ismodule(member): continue print " ", member, "=", getattr(settings, member) exit() if not toolchain: monitor.message( 'varial.main', "FATAL No toolchain or eventloops scripts defined." ) return toolchain = tools.ToolChain(None, [toolchain]) # needed for exec toolchain._reuse = settings.try_reuse_results # GO! if settings.can_go_parallel(): monitor.message( 'main.main', 'INFO running with %i parallel workers at max.' % settings.max_num_processes ) try: toolchain.run() except RuntimeError as e: if e.args[0] == 'End of reload results mode at: ': monitor.message( 'varial.main', 'WARNING ' + str(e.args) ) else: raise e
def _check_readability(wrp): try: literal_eval(wrp.pretty_writeable_lines().replace('\n', '')) except (ValueError, SyntaxError): monitor.message('diskio.write', 'WARNING Wrapper will not be readable:\n%s' % str(wrp))
def main(**main_kwargs): """ Configure varial and run a toolchain. :param main_kwargs: settings parameters given as keyword arguments are added to settings, e.g. ``samples={"mc":MCSample, ...}`` . :param samples: list of sample.Sample instances :param toolchain: root toolchain (see tools.py) """ splash.print_splash() # iPython mode def ipython_warn(): print "WARNING ==================================================" print "WARNING Detected iPython, going to interactive mode... " print "WARNING ==================================================" if ipython_mode: ipython_warn() atexit.register(tear_down) # else: # signal.signal(signal.SIGINT, sig_handler.handle) # setup samples if 'samples' in main_kwargs: samples = main_kwargs.pop('samples') analysis.all_samples = dict((s.name, s) for s in samples) if 'active_samples' in main_kwargs: analysis.active_samples = main_kwargs.pop('active_samples') # setup toolchain global toolchain toolchain = main_kwargs.pop('toolchain') # process kwargs for settings main_kwargs.update(_get_cmd_arg_settings()) main_args.update(main_kwargs) process_settings_kws(main_kwargs) logfile = settings.logfilename() logpath = os.path.split(logfile)[0] if not os.path.exists(logpath): os.mkdir(logpath) monitor.MonitorInfo.outstream = monitor.StdOutTee(logfile) # print settings? if '--settings' in sys.argv: import inspect print "Memberes of the settings module:" for member in dir(settings): if member[0] == '_' or inspect.ismodule(member): continue print " ", member, "=", getattr(settings, member) exit() if not toolchain: monitor.message('varial.main', "FATAL No toolchain or eventloops scripts defined.") return toolchain = tools.ToolChain(None, [toolchain]) # needed for exec toolchain._reuse = settings.try_reuse_results # GO! if settings.can_go_parallel(): monitor.message( 'main.main', 'INFO running with %i parallel workers at max.' % settings.max_num_processes) try: toolchain.run() except RuntimeError as e: if e.args[0] == 'End of reload results mode at: ': monitor.message('varial.main', 'WARNING ' + str(e.args)) else: raise e
def mc_stack_n_data_sum(wrps, merge_mc_key_func=None, use_all_data_lumi=True): """ Stacks MC histos and merges data, input needs to be sorted and grouped. Yields tuples of an MC stack, signal histograms, and a data histogram, if all kinds of data are present. Raises an exception if no histograms are given at all. :param wrps: Iterables of HistoWrapper (grouped) :param merge_mc_key_func: key function for python sorted(...), default tries to sort after stack position :yields: WrapperWrapper of wrappers for plotting """ if not merge_mc_key_func: merge_mc_key_func = analysis.get_stack_position for grp in wrps: # split stream dat, bkg, sig = split_data_bkg_sig(grp) # data dat_sum = None try: dat_sum = op.sum(dat) except op.TooFewWrpsError: monitor.message('generators.mc_stack_n_data_sum', 'DEBUG No data histograms present!') if dat_sum and not use_all_data_lumi: data_lumi = op.lumi(dat_sum) else: data_lumi = analysis.data_lumi_sum_wrp() # background (op.merge normalizes to lumi = 1.) bkg = sorted(bkg, key=merge_mc_key_func) is_2d = bkg and 'TH2' in bkg[0].type bkg = group(bkg, merge_mc_key_func) bkg = (op.merge(g) for g in bkg) bkg = apply_fillcolor(bkg) if settings.stack_line_color: bkg = apply_linecolor(bkg, settings.stack_line_color) if data_lumi.float != 1.: bkg = gen_prod(itertools.izip(bkg, itertools.repeat(data_lumi))) try: if is_2d: bkg_stk = gen_squash_sys_acc(bkg, op.sum) else: bkg_stk = gen_squash_sys_acc(bkg, op.stack) except op.TooFewWrpsError: bkg_stk = None monitor.message('generators.mc_stack_n_data_sum', 'DEBUG No background histograms present!') # signal sig = sorted(sig, key=merge_mc_key_func) sig = group(sig, merge_mc_key_func) sig = list(op.merge(g) for g in sig) if any(s.sys_info for s in sig): sig = sorted(sig, key=lambda s: s.sample) sig = group(sig, lambda s: s.sample) sig = (gen_squash_sys(s) for s in sig) sig = apply_linecolor(sig) sig = apply_linewidth(sig) sig = list(sig) if not sig: monitor.message('generators.mc_stack_n_data_sum', 'DEBUG No signal histograms present!') # return in order for plotting: bkg, signals, data res = [bkg_stk] + sig + [dat_sum] res = list(r for r in res if r) if res: yield wrappers.WrapperWrapper(res, name=grp.name) else: raise op.TooFewWrpsError('No histograms present!')