Exemple #1
0
def draw_workspace(workspace, output_path='.'):
    if not os.path.exists(output_path):
        mkdir_p(output_path)
    data = workspace.data('obsData')
    config = workspace.obj('ModelConfig')
    simpdf = config.pdf
    if isinstance(simpdf, ROOT.RooSimultaneous):
        index_category = simpdf.index_category
        for category in simpdf:
            pdf = simpdf.pdf(category)
            # get first observable
            obs = pdf.observables(config.observables).first()
            # total model histogram
            model_hist = pdf.createHistogram(
                'cat_{0}'.format(category.name), obs)
            # create the data histogram
            data_category = data.reduce('{0}=={1}::{2}'.format(
                index_category.name, index_category.name, category.name))
            data_hist = data_category.createHistogram(
                'hdata_cat_{0}'.format(category.name), obs)
            # get the background components
            comp_hists = []
            components = pdf.components()
            for component in components:
                if 'nominal' not in component.GetName():
                    continue
                comp_hists.append(
                    component.createHistogram(
                        '{0}_{1}'.format(
                            category.name, component.GetName()), obs))
Exemple #2
0
def save_canvas(canvas, directory, name, formats=None):
    # save images in directories corresponding to current git branch
    filepath = os.path.join(directory, REPO_BRANCH, name)
    path = os.path.dirname(filepath)
    if not os.path.exists(path):
        mkdir_p(path)
    if formats is not None:
        for fmt in formats:
            if fmt[0] != '.':
                fmt = '.' + fmt
            canvas.SaveAs(filepath + fmt)
    else:
        canvas.SaveAs(filepath)
Exemple #3
0
def save_canvas(canvas, directory, name, formats=None):
    # save images in directories corresponding to current git branch
    # filepath = os.path.join(directory, REPO_BRANCH, name)
    filepath = os.path.join(directory, name)
    path = os.path.dirname(filepath)
    if not os.path.exists(path):
        mkdir_p(path)
    if formats is not None:
        if isinstance(formats, basestring):
            formats = formats.split()
        for fmt in formats:
            if fmt[0] != '.':
                fmt = '.' + fmt
            canvas.SaveAs(filepath + fmt)
    else:
        canvas.SaveAs(filepath)
Exemple #4
0
def run(student,
        db,
        datasets,
        hosts,
        nproc=1,
        nice=0,
        output_path='.',
        setup=None,
        student_args=None,
        use_qsub=False,
        qsub_queue='medium',
        qsub_name_suffix=None,
        dry_run=False,
        separate_student_output=False,
        warnings_as_errors=False,
        **kwargs):

    if not kwargs:
        args = ''
    else:
        args = ' '.join([
            '--%s %s' % (key, value)
            for key, value in kwargs.items() if value is not None
        ]) + ' '

    if qsub_name_suffix is None:
        qsub_name_suffix = ''
    elif not qsub_name_suffix.startswith('_'):
        qsub_name_suffix = '_' + qsub_name_suffix

    database = Database(db)
    print database

    output_path = os.path.normpath(output_path)
    if separate_student_output and os.path.basename(output_path) != student:
        output_path = os.path.join(output_path, os.path.splitext(student)[0])
    if not os.path.exists(output_path):
        if dry_run:
            print "mkdir -p %s" % output_path
        else:
            mkdir_p(output_path)

    python_flags = ''
    if warnings_as_errors:
        python_flags = '-W error'

    CMD = "python %s run --output-path %s -s %s -n %%d --db %s --nice %d %s%%s" % (
        python_flags, output_path, student, db, nice, args)
    if setup is not None:
        CMD = "%s && %s" % (setup, CMD)
    CWD = os.getcwd()

    datasets = datasets[:]

    proc_cmds = []
    while len(datasets) > 0:
        ds = datasets.pop(0)

        output_name = os.path.splitext(student)[0] + '.' + ds
        suffix = kwargs.get('suffix', None)
        if suffix:
            output_name += '_%s' % suffix
        output_name += '.root'
        output_name = os.path.join(output_path, output_name)
        if os.path.exists(output_name):
            print "Output %s already exists. Please delete it and resubmit." % (
                output_name)
            continue

        try:
            files = database[ds].files
        except KeyError:
            print "dataset %s not in database" % ds
            continue

        # determine actual number of required CPU cores
        nproc_actual = min(nproc, len(files))
        cmd = CMD % (nproc_actual, ds)
        if student_args:
            cmd = '%s %s' % (cmd, ' '.join(student_args))
        cmd = "cd %s && %s" % (CWD, cmd)

        if use_qsub:  # use the batch system
            qsub(cmd,
                 queue=qsub_queue,
                 ppn=nproc_actual,
                 name=student.strip('.py') + '.' + ds + qsub_name_suffix,
                 stderr_path=output_path,
                 stdout_path=output_path,
                 dry_run=dry_run)

        else:  # use simple ssh
            print cmd
            if not dry_run:
                proc_cmds.append(cmd)

    if not use_qsub and not dry_run:
        # use simple ssh with basic load balancing
        hosts = [Host(host) for host in hosts]
        procs = []
        while True:
            active = mp.active_children()
            while len(active) < (2 * len(hosts)) and len(proc_cmds) > 0:
                hosts.sort()
                host = hosts[0]
                cmd = "ssh %s '%s'" % (host.name, proc_cmds.pop(0))
                proc = mp.Process(target=run_helper, args=(cmd, ))
                proc.start()
                procs.append(proc)
                host.njobs += 1
                # active_children() joins finished procs
                active = mp.active_children()
            #print time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
            #print "jobs: %i running %i queued"%(len(active),len(train_processes))
            if len(proc_cmds) == 0 and len(active) == 0:
                break
            time.sleep(10)
Exemple #5
0
def write_workspaces(path, prefix, year_mass_category_channel,
                     controls=None,
                     silence=False):
    log.info("writing workspaces ...")
    if controls is None:
        controls = []
    if not os.path.exists(path):
        mkdir_p(path)
    for year, mass_category_channel in year_mass_category_channel.items():
        # write workspaces for each year
        for mass, category_channel in mass_category_channel.items():
            if isinstance(controls, dict):
                if isinstance(controls[year], dict):
                    mass_controls = controls[year][mass].values()
                else:
                    mass_controls = controls[year]
            else:
                mass_controls = controls
            channels = []
            # make workspace for each category
            # include the control region in each
            for category, channel in category_channel.items():
                name = "{0}_{1}_{2}_{3}".format(
                    prefix, year % 1000, category, mass)
                log.info("writing {0} ...".format(name))
                # make workspace
                measurement = histfactory.make_measurement(
                    name, [channel] + mass_controls,
                    POI=POI,
                    const_params=CONST_PARAMS)
                workspace = histfactory.make_workspace(measurement, name=name,
                                                       silence=silence)
                with root_open(os.path.join(path, '{0}.root'.format(name)),
                               'recreate') as workspace_file:
                    workspace.Write()
                    # mu=1 for Asimov data
                    #measurement.SetParamValue('SigXsecOverSM', 1)
                    histfactory.write_measurement(measurement,
                        root_file=workspace_file,
                        xml_path=os.path.join(path, name),
                        silence=silence)
                channels.append(channel)
            # make combined workspace
            name = "{0}_{1}_combination_{2}".format(prefix, year % 1000, mass)
            log.info("writing {0} ...".format(name))
            measurement = histfactory.make_measurement(
                name, channels + mass_controls,
                POI=POI,
                const_params=CONST_PARAMS)
            workspace = histfactory.make_workspace(measurement, name=name,
                                                   silence=silence)
            with root_open(os.path.join(path, '{0}.root'.format(name)),
                           'recreate') as workspace_file:
                workspace.Write()
                # mu=1 for Asimov data
                #measurement.SetParamValue('SigXsecOverSM', 1)
                histfactory.write_measurement(measurement,
                    root_file=workspace_file,
                    xml_path=os.path.join(path, name),
                    silence=silence)
    # write combined workspaces over all years
    years = year_mass_category_channel.keys()
    if len(years) == 1:
        return
    masses = year_mass_category_channel[years[0]].keys()
    categories = year_mass_category_channel[years[0]][masses[0]].keys()
    for mass in masses:
        if isinstance(controls, dict):
            if isinstance(controls[year], dict):
                mass_controls = [control for year in years
                                 for control in controls[year][mass].values()]
            else:
                mass_controls = [control for year in years
                                 for control in controls[year]]
        else:
            mass_controls = controls
        channels = []
        # make workspace for each category
        # include the control region in each
        # TODO: categories might be different across years
        """
        for category in categories:
            cat_channels = [year_mass_category_channel[year][mass][category]
                            for year in years]
            name = "{0}_full_{1}_{2}".format(
                prefix, category, mass)
            log.info("writing {0} ...".format(name))
            # make workspace
            measurement = histfactory.make_measurement(
                name, cat_channels + mass_controls,
                POI=POI,
                const_params=CONST_PARAMS)
            workspace = histfactory.make_workspace(measurement, name=name,
                                                   silence=silence)
            with root_open(os.path.join(path, '{0}.root'.format(name)),
                           'recreate') as workspace_file:
                workspace.Write()
                # mu=1 for Asimov data
                #measurement.SetParamValue('SigXsecOverSM', 1)
                histfactory.write_measurement(measurement,
                    root_file=workspace_file,
                    xml_path=os.path.join(path, name),
                    silence=silence)
            channels.extend(cat_channels)
        """
        channels = [chan for year in years
                    for chan in year_mass_category_channel[year][mass].values()]
        # make combined workspace
        name = "{0}_full_combination_{1}".format(prefix, mass)
        log.info("writing {0} ...".format(name))
        measurement = histfactory.make_measurement(
            name, channels + mass_controls,
            POI=POI,
            const_params=CONST_PARAMS)
        workspace = histfactory.make_workspace(measurement, name=name,
                                               silence=silence)
        with root_open(os.path.join(path, '{0}.root'.format(name)),
                       'recreate') as workspace_file:
            workspace.Write()
            # mu=1 for Asimov data
            #measurement.SetParamValue('SigXsecOverSM', 1)
            histfactory.write_measurement(measurement,
                root_file=workspace_file,
                xml_path=os.path.join(path, name),
                silence=silence)
Exemple #6
0
def run(student,
        db,
        datasets,
        hosts,
        nproc=1,
        nice=0,
        output_path='.',
        setup=None,
        student_args=None,
        use_qsub=False,
        qsub_queue='medium',
        qsub_name_suffix=None,
        dry_run=False,
        separate_student_output=False,
        warnings_as_errors=False,
        **kwargs):

    if not kwargs:
        args = ''
    else:
        args = ' '.join(['--%s %s' % (key, value)
            for key, value in kwargs.items() if value is not None]) + ' '

    if qsub_name_suffix is None:
        qsub_name_suffix = ''
    elif not qsub_name_suffix.startswith('_'):
        qsub_name_suffix = '_' + qsub_name_suffix

    database = Database(db)

    output_path = os.path.normpath(output_path)
    if separate_student_output and os.path.basename(output_path) != student:
        output_path = os.path.join(output_path, os.path.splitext(student)[0])
    if not os.path.exists(output_path):
        if dry_run:
            print "mkdir -p %s" % output_path
        else:
            mkdir_p(output_path)

    python_flags = ''
    if warnings_as_errors:
        python_flags = '-W error'

    CMD = "python %s run --output-path %s -s %s -n %%d --db %s --nice %d %s%%s" % (
           python_flags, output_path, student, db, nice, args)
    if setup is not None:
        CMD = "%s && %s" % (setup, CMD)
    CWD = os.getcwd()

    datasets = datasets[:]

    proc_cmds = []
    while len(datasets) > 0:
        ds = datasets.pop(0)

        output_name = os.path.splitext(student)[0] + '.' + ds
        suffix = kwargs.get('suffix', None)
        if suffix:
            output_name += '_%s' % suffix
        output_name += '.root'
        output_name = os.path.join(output_path, output_name)
        if os.path.exists(output_name):
            print "Output %s already exists. Please delete it and resubmit." % (
                output_name)
            continue

        try:
            files = database[ds].files
        except KeyError:
            print "dataset %s not in database" % ds
            continue

        # determine actual number of required CPU cores
        nproc_actual = min(nproc, len(files))
        cmd = CMD % (nproc_actual, ds)
        if student_args:
            cmd = '%s %s' % (cmd, ' '.join(student_args))
        cmd = "cd %s && %s" % (CWD, cmd)

        if use_qsub: # use the batch system
            qsub(cmd,
                 queue=qsub_queue,
                 ppn=nproc_actual,
                 name=student.strip('.py') + '.' + ds + qsub_name_suffix,
                 stderr_path=output_path,
                 stdout_path=output_path,
                 dry_run=dry_run)

        else: # use simple ssh
            print cmd
            if not dry_run:
                proc_cmds.append(cmd)

    if not use_qsub and not dry_run:
        # use simple ssh with basic load balancing
        hosts = [Host(host) for host in hosts]
        procs = []
        while True:
            active = mp.active_children()
            while len(active) < (2 * len(hosts)) and len(proc_cmds) > 0:
                hosts.sort()
                host = hosts[0]
                cmd = "ssh %s '%s'" % (host.name, proc_cmds.pop(0))
                proc = mp.Process(target=run_helper, args=(cmd,))
                proc.start()
                procs.append(proc)
                host.njobs += 1
                # active_children() joins finished procs
                active = mp.active_children()
            #print time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
            #print "jobs: %i running %i queued"%(len(active),len(train_processes))
            if len(proc_cmds) == 0 and len(active) == 0:
                break
            time.sleep(10)
Exemple #7
0
def plots_dir(script):
    script = os.path.basename(script)
    script = os.path.splitext(script)[0]
    dir = os.path.join(PLOTS_DIR, script)
    mkdir_p(dir)
    return dir
Exemple #8
0
def write_workspaces(path, prefix, year_mass_category_channel,
                     controls=None, shapeControls=None,
                     silence=False):
    log.info("writing workspaces ... for {0}".format(str(year_mass_category_channel)))
    if controls is None:
        controls = []
    if shapeControls is None:
        shapeControls = []

    if not os.path.exists(path):
        mkdir_p(path)
    for year, mass_category_channel in year_mass_category_channel.items():
        # write workspaces for each year
        for mass, category_channel in mass_category_channel.items():
            if isinstance(controls, dict):
                log.info("controls are dicks")
                if isinstance(controls[year], dict):
                    log.info("controls by year are dicks")
                    mass_controls = controls[year][mass].values()
                else:
                    log.info("controls years are dicks")
                    mass_controls = controls[year]
            else:
                mass_controls = controls
            if isinstance(shapeControls, dict):
                log.info("shapeControls are dicks")
                if isinstance(shapeControls[year], dict):
                    log.info("shapeControls by year are dicks")
                    shape_controls = shapeControls[year][mass].values()
                else:
                    log.info("shapeControls years are dicks")
                    shape_controls = shapeControls[year]
            else:
                shape_controls = shapeControls

            log.info("comparing {0} with {1}".format(str(mass_controls),str(shape_controls)))
            channels = []
            # make workspace for each category
            # include the control region in each
            nCategories=0
            for category, channel in category_channel.items():
                nCategories+=1
                name = "{0}_{1}_{2}_{3}".format(
                    prefix, year % 1000, category, mass)
                log.info("writing {0} ...".format(name))
                if mass<0.:
                    parity='m'
                else:
                    parity='p'
                newname = "AllSys_cp_{}_0_{:02.0f}_{}".format(parity, abs(mass*100), prefix)

                # print newname
                measurement = histfactory.make_measurement(
                    newname, [channel] + mass_controls + shape_controls,
                    POI=POI,
                    const_params=CONST_PARAMS)
                workspace = histfactory.make_workspace(measurement, name=newname,
                                                       silence=silence)
                with root_open(os.path.join(path, '{0}.root'.format(newname)),
                               'recreate') as workspace_file:
                    workspace.Write()
                    # mu=1 for Asimov data
#                    measurement.SetParamValue('ATLAS_epsilon', 1)
                    histfactory.write_measurement(measurement,
                        root_file=workspace_file,
                        xml_path=os.path.join(path, newname),
                        silence=silence)
                channels.append(channel)            
            log.info("length of channels is {0}".format(str(len(channels))))
Exemple #9
0
def write_workspaces(path,
                     prefix,
                     year_mass_category_channel,
                     controls=None,
                     silence=False):
    log.info("writing workspaces ...")
    if controls is None:
        controls = []
    if not os.path.exists(path):
        mkdir_p(path)
    for year, mass_category_channel in year_mass_category_channel.items():
        # write workspaces for each year
        for mass, category_channel in mass_category_channel.items():
            if isinstance(controls, dict):
                if isinstance(controls[year], dict):
                    mass_controls = controls[year][mass].values()
                else:
                    mass_controls = controls[year]
            else:
                mass_controls = controls
            channels = []
            # make workspace for each category
            # include the control region in each
            for category, channel in category_channel.items():
                name = "{0}_{1}_{2}_{3}".format(prefix, year % 1000, category,
                                                mass)
                log.info("writing {0} ...".format(name))
                # make workspace
                measurement = histfactory.make_measurement(
                    name, [channel] + mass_controls,
                    POI=POI,
                    const_params=CONST_PARAMS)
                workspace = histfactory.make_workspace(measurement,
                                                       name=name,
                                                       silence=silence)
                with root_open(os.path.join(path, '{0}.root'.format(name)),
                               'recreate') as workspace_file:
                    workspace.Write()
                    # mu=1 for Asimov data
                    #measurement.SetParamValue('SigXsecOverSM', 1)
                    histfactory.write_measurement(measurement,
                                                  root_file=workspace_file,
                                                  xml_path=os.path.join(
                                                      path, name),
                                                  silence=silence)
                channels.append(channel)
            # make combined workspace
            name = "{0}_{1}_combination_{2}".format(prefix, year % 1000, mass)
            log.info("writing {0} ...".format(name))
            measurement = histfactory.make_measurement(
                name,
                channels + mass_controls,
                POI=POI,
                const_params=CONST_PARAMS)
            workspace = histfactory.make_workspace(measurement,
                                                   name=name,
                                                   silence=silence)
            with root_open(os.path.join(path, '{0}.root'.format(name)),
                           'recreate') as workspace_file:
                workspace.Write()
                # mu=1 for Asimov data
                #measurement.SetParamValue('SigXsecOverSM', 1)
                histfactory.write_measurement(measurement,
                                              root_file=workspace_file,
                                              xml_path=os.path.join(
                                                  path, name),
                                              silence=silence)
    # write combined workspaces over all years
    years = year_mass_category_channel.keys()
    if len(years) == 1:
        return
    masses = year_mass_category_channel[years[0]].keys()
    categories = year_mass_category_channel[years[0]][masses[0]].keys()
    for mass in masses:
        if isinstance(controls, dict):
            if isinstance(controls[year], dict):
                mass_controls = [
                    control for year in years
                    for control in controls[year][mass].values()
                ]
            else:
                mass_controls = [
                    control for year in years for control in controls[year]
                ]
        else:
            mass_controls = controls
        channels = []
        # make workspace for each category
        # include the control region in each
        # TODO: categories might be different across years
        """
        for category in categories:
            cat_channels = [year_mass_category_channel[year][mass][category]
                            for year in years]
            name = "{0}_full_{1}_{2}".format(
                prefix, category, mass)
            log.info("writing {0} ...".format(name))
            # make workspace
            measurement = histfactory.make_measurement(
                name, cat_channels + mass_controls,
                POI=POI,
                const_params=CONST_PARAMS)
            workspace = histfactory.make_workspace(measurement, name=name,
                                                   silence=silence)
            with root_open(os.path.join(path, '{0}.root'.format(name)),
                           'recreate') as workspace_file:
                workspace.Write()
                # mu=1 for Asimov data
                #measurement.SetParamValue('SigXsecOverSM', 1)
                histfactory.write_measurement(measurement,
                    root_file=workspace_file,
                    xml_path=os.path.join(path, name),
                    silence=silence)
            channels.extend(cat_channels)
        """
        channels = [
            chan for year in years
            for chan in year_mass_category_channel[year][mass].values()
        ]
        # make combined workspace
        name = "{0}_full_combination_{1}".format(prefix, mass)
        log.info("writing {0} ...".format(name))
        measurement = histfactory.make_measurement(name,
                                                   channels + mass_controls,
                                                   POI=POI,
                                                   const_params=CONST_PARAMS)
        workspace = histfactory.make_workspace(measurement,
                                               name=name,
                                               silence=silence)
        with root_open(os.path.join(path, '{0}.root'.format(name)),
                       'recreate') as workspace_file:
            workspace.Write()
            # mu=1 for Asimov data
            #measurement.SetParamValue('SigXsecOverSM', 1)
            histfactory.write_measurement(measurement,
                                          root_file=workspace_file,
                                          xml_path=os.path.join(path, name),
                                          silence=silence)
Exemple #10
0
def plots_dir(script):
    script = os.path.basename(script)
    script = os.path.splitext(script)[0]
    dir = os.path.join(PLOTS_DIR, script)
    mkdir_p(dir)
    return dir