def draw_workspace(workspace, output_path='.'): if not os.path.exists(output_path): mkdir_p(output_path) data = workspace.data('obsData') config = workspace.obj('ModelConfig') simpdf = config.pdf if isinstance(simpdf, ROOT.RooSimultaneous): index_category = simpdf.index_category for category in simpdf: pdf = simpdf.pdf(category) # get first observable obs = pdf.observables(config.observables).first() # total model histogram model_hist = pdf.createHistogram( 'cat_{0}'.format(category.name), obs) # create the data histogram data_category = data.reduce('{0}=={1}::{2}'.format( index_category.name, index_category.name, category.name)) data_hist = data_category.createHistogram( 'hdata_cat_{0}'.format(category.name), obs) # get the background components comp_hists = [] components = pdf.components() for component in components: if 'nominal' not in component.GetName(): continue comp_hists.append( component.createHistogram( '{0}_{1}'.format( category.name, component.GetName()), obs))
def save_canvas(canvas, directory, name, formats=None): # save images in directories corresponding to current git branch filepath = os.path.join(directory, REPO_BRANCH, name) path = os.path.dirname(filepath) if not os.path.exists(path): mkdir_p(path) if formats is not None: for fmt in formats: if fmt[0] != '.': fmt = '.' + fmt canvas.SaveAs(filepath + fmt) else: canvas.SaveAs(filepath)
def save_canvas(canvas, directory, name, formats=None): # save images in directories corresponding to current git branch # filepath = os.path.join(directory, REPO_BRANCH, name) filepath = os.path.join(directory, name) path = os.path.dirname(filepath) if not os.path.exists(path): mkdir_p(path) if formats is not None: if isinstance(formats, basestring): formats = formats.split() for fmt in formats: if fmt[0] != '.': fmt = '.' + fmt canvas.SaveAs(filepath + fmt) else: canvas.SaveAs(filepath)
def run(student, db, datasets, hosts, nproc=1, nice=0, output_path='.', setup=None, student_args=None, use_qsub=False, qsub_queue='medium', qsub_name_suffix=None, dry_run=False, separate_student_output=False, warnings_as_errors=False, **kwargs): if not kwargs: args = '' else: args = ' '.join([ '--%s %s' % (key, value) for key, value in kwargs.items() if value is not None ]) + ' ' if qsub_name_suffix is None: qsub_name_suffix = '' elif not qsub_name_suffix.startswith('_'): qsub_name_suffix = '_' + qsub_name_suffix database = Database(db) print database output_path = os.path.normpath(output_path) if separate_student_output and os.path.basename(output_path) != student: output_path = os.path.join(output_path, os.path.splitext(student)[0]) if not os.path.exists(output_path): if dry_run: print "mkdir -p %s" % output_path else: mkdir_p(output_path) python_flags = '' if warnings_as_errors: python_flags = '-W error' CMD = "python %s run --output-path %s -s %s -n %%d --db %s --nice %d %s%%s" % ( python_flags, output_path, student, db, nice, args) if setup is not None: CMD = "%s && %s" % (setup, CMD) CWD = os.getcwd() datasets = datasets[:] proc_cmds = [] while len(datasets) > 0: ds = datasets.pop(0) output_name = os.path.splitext(student)[0] + '.' + ds suffix = kwargs.get('suffix', None) if suffix: output_name += '_%s' % suffix output_name += '.root' output_name = os.path.join(output_path, output_name) if os.path.exists(output_name): print "Output %s already exists. Please delete it and resubmit." % ( output_name) continue try: files = database[ds].files except KeyError: print "dataset %s not in database" % ds continue # determine actual number of required CPU cores nproc_actual = min(nproc, len(files)) cmd = CMD % (nproc_actual, ds) if student_args: cmd = '%s %s' % (cmd, ' '.join(student_args)) cmd = "cd %s && %s" % (CWD, cmd) if use_qsub: # use the batch system qsub(cmd, queue=qsub_queue, ppn=nproc_actual, name=student.strip('.py') + '.' + ds + qsub_name_suffix, stderr_path=output_path, stdout_path=output_path, dry_run=dry_run) else: # use simple ssh print cmd if not dry_run: proc_cmds.append(cmd) if not use_qsub and not dry_run: # use simple ssh with basic load balancing hosts = [Host(host) for host in hosts] procs = [] while True: active = mp.active_children() while len(active) < (2 * len(hosts)) and len(proc_cmds) > 0: hosts.sort() host = hosts[0] cmd = "ssh %s '%s'" % (host.name, proc_cmds.pop(0)) proc = mp.Process(target=run_helper, args=(cmd, )) proc.start() procs.append(proc) host.njobs += 1 # active_children() joins finished procs active = mp.active_children() #print time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) #print "jobs: %i running %i queued"%(len(active),len(train_processes)) if len(proc_cmds) == 0 and len(active) == 0: break time.sleep(10)
def write_workspaces(path, prefix, year_mass_category_channel, controls=None, silence=False): log.info("writing workspaces ...") if controls is None: controls = [] if not os.path.exists(path): mkdir_p(path) for year, mass_category_channel in year_mass_category_channel.items(): # write workspaces for each year for mass, category_channel in mass_category_channel.items(): if isinstance(controls, dict): if isinstance(controls[year], dict): mass_controls = controls[year][mass].values() else: mass_controls = controls[year] else: mass_controls = controls channels = [] # make workspace for each category # include the control region in each for category, channel in category_channel.items(): name = "{0}_{1}_{2}_{3}".format( prefix, year % 1000, category, mass) log.info("writing {0} ...".format(name)) # make workspace measurement = histfactory.make_measurement( name, [channel] + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join(path, name), silence=silence) channels.append(channel) # make combined workspace name = "{0}_{1}_combination_{2}".format(prefix, year % 1000, mass) log.info("writing {0} ...".format(name)) measurement = histfactory.make_measurement( name, channels + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join(path, name), silence=silence) # write combined workspaces over all years years = year_mass_category_channel.keys() if len(years) == 1: return masses = year_mass_category_channel[years[0]].keys() categories = year_mass_category_channel[years[0]][masses[0]].keys() for mass in masses: if isinstance(controls, dict): if isinstance(controls[year], dict): mass_controls = [control for year in years for control in controls[year][mass].values()] else: mass_controls = [control for year in years for control in controls[year]] else: mass_controls = controls channels = [] # make workspace for each category # include the control region in each # TODO: categories might be different across years """ for category in categories: cat_channels = [year_mass_category_channel[year][mass][category] for year in years] name = "{0}_full_{1}_{2}".format( prefix, category, mass) log.info("writing {0} ...".format(name)) # make workspace measurement = histfactory.make_measurement( name, cat_channels + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join(path, name), silence=silence) channels.extend(cat_channels) """ channels = [chan for year in years for chan in year_mass_category_channel[year][mass].values()] # make combined workspace name = "{0}_full_combination_{1}".format(prefix, mass) log.info("writing {0} ...".format(name)) measurement = histfactory.make_measurement( name, channels + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join(path, name), silence=silence)
def run(student, db, datasets, hosts, nproc=1, nice=0, output_path='.', setup=None, student_args=None, use_qsub=False, qsub_queue='medium', qsub_name_suffix=None, dry_run=False, separate_student_output=False, warnings_as_errors=False, **kwargs): if not kwargs: args = '' else: args = ' '.join(['--%s %s' % (key, value) for key, value in kwargs.items() if value is not None]) + ' ' if qsub_name_suffix is None: qsub_name_suffix = '' elif not qsub_name_suffix.startswith('_'): qsub_name_suffix = '_' + qsub_name_suffix database = Database(db) output_path = os.path.normpath(output_path) if separate_student_output and os.path.basename(output_path) != student: output_path = os.path.join(output_path, os.path.splitext(student)[0]) if not os.path.exists(output_path): if dry_run: print "mkdir -p %s" % output_path else: mkdir_p(output_path) python_flags = '' if warnings_as_errors: python_flags = '-W error' CMD = "python %s run --output-path %s -s %s -n %%d --db %s --nice %d %s%%s" % ( python_flags, output_path, student, db, nice, args) if setup is not None: CMD = "%s && %s" % (setup, CMD) CWD = os.getcwd() datasets = datasets[:] proc_cmds = [] while len(datasets) > 0: ds = datasets.pop(0) output_name = os.path.splitext(student)[0] + '.' + ds suffix = kwargs.get('suffix', None) if suffix: output_name += '_%s' % suffix output_name += '.root' output_name = os.path.join(output_path, output_name) if os.path.exists(output_name): print "Output %s already exists. Please delete it and resubmit." % ( output_name) continue try: files = database[ds].files except KeyError: print "dataset %s not in database" % ds continue # determine actual number of required CPU cores nproc_actual = min(nproc, len(files)) cmd = CMD % (nproc_actual, ds) if student_args: cmd = '%s %s' % (cmd, ' '.join(student_args)) cmd = "cd %s && %s" % (CWD, cmd) if use_qsub: # use the batch system qsub(cmd, queue=qsub_queue, ppn=nproc_actual, name=student.strip('.py') + '.' + ds + qsub_name_suffix, stderr_path=output_path, stdout_path=output_path, dry_run=dry_run) else: # use simple ssh print cmd if not dry_run: proc_cmds.append(cmd) if not use_qsub and not dry_run: # use simple ssh with basic load balancing hosts = [Host(host) for host in hosts] procs = [] while True: active = mp.active_children() while len(active) < (2 * len(hosts)) and len(proc_cmds) > 0: hosts.sort() host = hosts[0] cmd = "ssh %s '%s'" % (host.name, proc_cmds.pop(0)) proc = mp.Process(target=run_helper, args=(cmd,)) proc.start() procs.append(proc) host.njobs += 1 # active_children() joins finished procs active = mp.active_children() #print time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) #print "jobs: %i running %i queued"%(len(active),len(train_processes)) if len(proc_cmds) == 0 and len(active) == 0: break time.sleep(10)
def plots_dir(script): script = os.path.basename(script) script = os.path.splitext(script)[0] dir = os.path.join(PLOTS_DIR, script) mkdir_p(dir) return dir
def write_workspaces(path, prefix, year_mass_category_channel, controls=None, shapeControls=None, silence=False): log.info("writing workspaces ... for {0}".format(str(year_mass_category_channel))) if controls is None: controls = [] if shapeControls is None: shapeControls = [] if not os.path.exists(path): mkdir_p(path) for year, mass_category_channel in year_mass_category_channel.items(): # write workspaces for each year for mass, category_channel in mass_category_channel.items(): if isinstance(controls, dict): log.info("controls are dicks") if isinstance(controls[year], dict): log.info("controls by year are dicks") mass_controls = controls[year][mass].values() else: log.info("controls years are dicks") mass_controls = controls[year] else: mass_controls = controls if isinstance(shapeControls, dict): log.info("shapeControls are dicks") if isinstance(shapeControls[year], dict): log.info("shapeControls by year are dicks") shape_controls = shapeControls[year][mass].values() else: log.info("shapeControls years are dicks") shape_controls = shapeControls[year] else: shape_controls = shapeControls log.info("comparing {0} with {1}".format(str(mass_controls),str(shape_controls))) channels = [] # make workspace for each category # include the control region in each nCategories=0 for category, channel in category_channel.items(): nCategories+=1 name = "{0}_{1}_{2}_{3}".format( prefix, year % 1000, category, mass) log.info("writing {0} ...".format(name)) if mass<0.: parity='m' else: parity='p' newname = "AllSys_cp_{}_0_{:02.0f}_{}".format(parity, abs(mass*100), prefix) # print newname measurement = histfactory.make_measurement( newname, [channel] + mass_controls + shape_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=newname, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(newname)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data # measurement.SetParamValue('ATLAS_epsilon', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join(path, newname), silence=silence) channels.append(channel) log.info("length of channels is {0}".format(str(len(channels))))
def write_workspaces(path, prefix, year_mass_category_channel, controls=None, silence=False): log.info("writing workspaces ...") if controls is None: controls = [] if not os.path.exists(path): mkdir_p(path) for year, mass_category_channel in year_mass_category_channel.items(): # write workspaces for each year for mass, category_channel in mass_category_channel.items(): if isinstance(controls, dict): if isinstance(controls[year], dict): mass_controls = controls[year][mass].values() else: mass_controls = controls[year] else: mass_controls = controls channels = [] # make workspace for each category # include the control region in each for category, channel in category_channel.items(): name = "{0}_{1}_{2}_{3}".format(prefix, year % 1000, category, mass) log.info("writing {0} ...".format(name)) # make workspace measurement = histfactory.make_measurement( name, [channel] + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join( path, name), silence=silence) channels.append(channel) # make combined workspace name = "{0}_{1}_combination_{2}".format(prefix, year % 1000, mass) log.info("writing {0} ...".format(name)) measurement = histfactory.make_measurement( name, channels + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join( path, name), silence=silence) # write combined workspaces over all years years = year_mass_category_channel.keys() if len(years) == 1: return masses = year_mass_category_channel[years[0]].keys() categories = year_mass_category_channel[years[0]][masses[0]].keys() for mass in masses: if isinstance(controls, dict): if isinstance(controls[year], dict): mass_controls = [ control for year in years for control in controls[year][mass].values() ] else: mass_controls = [ control for year in years for control in controls[year] ] else: mass_controls = controls channels = [] # make workspace for each category # include the control region in each # TODO: categories might be different across years """ for category in categories: cat_channels = [year_mass_category_channel[year][mass][category] for year in years] name = "{0}_full_{1}_{2}".format( prefix, category, mass) log.info("writing {0} ...".format(name)) # make workspace measurement = histfactory.make_measurement( name, cat_channels + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join(path, name), silence=silence) channels.extend(cat_channels) """ channels = [ chan for year in years for chan in year_mass_category_channel[year][mass].values() ] # make combined workspace name = "{0}_full_combination_{1}".format(prefix, mass) log.info("writing {0} ...".format(name)) measurement = histfactory.make_measurement(name, channels + mass_controls, POI=POI, const_params=CONST_PARAMS) workspace = histfactory.make_workspace(measurement, name=name, silence=silence) with root_open(os.path.join(path, '{0}.root'.format(name)), 'recreate') as workspace_file: workspace.Write() # mu=1 for Asimov data #measurement.SetParamValue('SigXsecOverSM', 1) histfactory.write_measurement(measurement, root_file=workspace_file, xml_path=os.path.join(path, name), silence=silence)