Example #1
0
    def __init__(self, run_conf):

        # configurations
        self.loc = run_conf.loc
        self.run_conf = run_conf
        self.opt_conf = run_conf.get('Optimiser')
        self.trn_conf = run_conf.get('Training')
        self.bcm_conf = run_conf.get('Benchmark')
        self.plt_conf = run_conf.get('Plotting')

        # models
        clf_type = run_conf.get('Classifier', 'type')
        adv_type = run_conf.get('Adversary', 'type')
        njet = self.trn_conf['njet']
        rmd = self.trn_conf['n_rmd']
        clf_name = '{}_{}_{}'.format(clf_type, njet, rmd)
        adv_name = '{}_{}_{}'.format(adv_type, njet, rmd)
        self.clf = models.Classifier.create(clf_name, run_conf.get('Classifier'))
        self.adv = models.Adversary.create(adv_name, run_conf.get('Adversary'))

        print('------------')
        print('--- Settings:')
        print(self.clf)
        print(self.adv)
        print('Optimisation', self.opt_conf)
        print('Training', self.trn_conf)
        print('Benchmark', self.bcm_conf)
        print('Plotting', self.plt_conf)
        print('------------')

        # load the data handler and the data
        self.njet = self.trn_conf['njet']
        self.dh = None
        self.bcm_features = dataset.feature_list(self.trn_conf['bcm_features'], self.njet)
        self.clf_features = dataset.feature_list(self.trn_conf['clf_features'], self.njet)
        self._ds = {}
        self._tt = ['train', 'test']
        self._tts = ['train', 'test', 'ss']
        self._load_data()

        # prepare losses
        self._losses = {tt:{n:[] for n in ['C', 'A', 'CA', 'BCM']} for tt in self._tt}

        # prepare metrics
        self.metrics = ['roc_auc', 'sig_eff', 'ks_metric']
        self._metric_vals = {met:{tt:[] for tt in self._tt} for met in self.metrics}

        # prepare classifier scores
        self.bcm = None
        self.bcm_score = {}
        self.clf_score = {}
        self.score_loc = utils.makedir(os.path.join(self.loc, 'clf_scores'.format(self.clf.name)))
Example #2
0
    def make_new_runconf_dir(self):

        for i in itertools.count():

            # does this run dir already exist?
            trial_path = os.path.join(self.loc, 'points',
                                      'run{:04d}'.format(i))

            # try next one if it exists, otherwise return the path
            if os.path.exists(trial_path):
                continue
            else:
                return utils.makedir(trial_path)
Example #3
0
def main():

    # parse args
    parser = argparse.ArgumentParser(description='ArgParser')
    parser.add_argument('-s',
                        '--sweep',
                        default='testsweep',
                        help='Name of the sweep.')
    parser.add_argument('--batch', default=False, action='store_true')
    args = parser.parse_args()

    # get the location and configuration file
    loc = utils.makedir(os.path.join(os.getenv('RUN'), args.sweep))
    sweep_conf = configuration.Configuration(
        os.path.join(loc, 'sweep_conf.ini'))

    print('--- Running {}'.format(args.sweep))
    print(sweep_conf)

    # submit the jobs for all the run configs
    for run_conf in sweep_conf:

        print()
        print('--- Running point')
        print()

        # make the command
        e = os.path.join(os.getenv('SRC'), 'scripts', 'run_point.py')
        p = run_conf.path
        command = 'time python3 {} -p {}'.format(e, p)

        # run the command (submit)
        if args.batch:
            run_nb = p.split('/')[-2]
            path = os.path.join(
                os.path.split(run_conf.path)[0], '{}.sh'.format(run_nb))
            print(path)
            utils.write_job(command, path)
            utils.send_job(path)
        else:
            os.system(command)
            break
Example #4
0
def main():

    # parse args
    parser = argparse.ArgumentParser(description='ArgParser')
    parser.add_argument('-s',
                        '--sweep',
                        default='testsweep',
                        help='Name of the sweep.')
    args = parser.parse_args()

    print('--- Summarising {}'.format(args.sweep))

    # read the results as a dataframe
    results, options, metrics = configuration.read_results(args.sweep)

    # plot the results summary
    loc = utils.makedir(os.path.join(os.getenv('RUN'), args.sweep, 'plots'))
    for metric, option in itertools.product(metrics, options):
        plot.metric_vs_parameter(metric, option, results, loc)

    for option in options:
        plot.metric2d('sig_eff', 'roc_auc', option, results, loc)
        plot.metric2d('ks_metric', 'roc_auc', option, results, loc)
        plot.metric2d('ks_metric', 'sig_eff', option, results, loc)
Example #5
0
 def loc(name):
     return self.loc if is_final_step else utils.makedir(os.path.join(self.loc, name))
Example #6
0
def main():

    #####################
    # Les configurables
    #####################

    prod_name = 'default'
    prod_name = '{}_{}'.format(datetime.strftime(datetime.today(), '%Y%m%d'),
                               prod_name)

    # other settings
    do_steps = [1, 2, 3, 4]
    step1 = 'step1'
    step2 = 'step2'
    step3 = 'step3'
    out = 'out'

    #####################
    # Prepare directories and files
    #####################

    loc = {}
    loc[defs.sig] = utils.makedir(
        '/data/atlassmallfiles/users/zgubic/hmumu/v17/hadd')
    loc[defs.data] = utils.makedir(
        '/data/atlassmallfiles/users/zgubic/hmumu/v17/hadd')
    loc[defs.ss] = utils.makedir(
        '/data/atlassmallfiles/users/zgubic/hmumu/spurious_signal')

    loc[out] = utils.makedir(os.path.join(os.getenv('DATA'), prod_name))

    # get the file names
    fnames = {}
    fnames[defs.sig] = [
        'mc16a.345097.root',
        'mc16d.345097.root',
        'mc16a.345106.root',
        'mc16d.345106.root',
    ]
    fnames[defs.data] = [
        'data15.allYear.sideband.root',
        'data16.allYear.sideband.root',
        'data17.allYear.sideband.root',
    ]
    fnames[defs.ss] = os.listdir(loc[defs.ss])

    #####################
    # Step 1: Mass cut + selection
    #####################

    if 1 in do_steps:
        loc[step1] = utils.makedir(os.path.join(loc[out], step1))
        for dataset in defs.datasets:
            for fname in fnames[dataset]:

                # input file
                in_file = os.path.join(loc[dataset], fname)

                # output file
                out_file = os.path.join(loc[step1], fname)

                # run the selection
                full_selection = 1 if dataset in [defs.sig, defs.data] else 0
                is_signal = 1 if dataset in [defs.sig] else 0

                command = "root -l -q 'selection.cxx(\"{i}\", \"{o}\", {sel}, {sig})'".format(
                    i=in_file, o=out_file, sel=full_selection, sig=is_signal)
                os.system(command)

    #####################
    # Step 2: Hadd
    #####################

    if 2 in do_steps:
        loc[step2] = utils.makedir(os.path.join(loc[out], step2))
        for dataset in defs.datasets:

            # collect input files
            in_files = [
                os.path.join(loc[step1], fname) for fname in fnames[dataset]
            ]

            # hadded file
            out_file = os.path.join(loc[step2], dataset) + '.root'

            # hadd them
            command = 'hadd -f {} {}'.format(out_file, ' '.join(in_files))
            os.system(command)

    #####################
    # Step 3: Split in jet categories
    #####################

    if 3 in do_steps:
        loc[step3] = utils.makedir(os.path.join(loc[out], step3))
        for dataset in defs.datasets:

            # hadded file
            hadd_file = os.path.join(loc[step2], '{}.root'.format(dataset))

            # split in 0, 1, 2+ jet datasets (separate trees inside)
            split_file = os.path.join(loc[step3], '{}.root'.format(dataset))

            # run the macro to make new trees
            command = "root -l -q 'njet_split.cxx(\"{i}\", \"{o}\")'".format(
                i=hadd_file, o=split_file)
            os.system(command)

    #####################
    # Step 4: Shuffle the entries
    #####################

    if 4 in do_steps:
        for dataset in defs.datasets:
            for tree in defs.channels:

                # read the dataframes
                in_file = os.path.join(loc[step3], '{}.root'.format(dataset))
                df = rpd.read_root(in_file, key=tree)

                # shuffle the tree
                df = df.sample(frac=1).reset_index(drop=True)

                # and write it
                out_file = os.path.join(loc[out], '{}.root'.format(dataset))
                mode = 'w' if tree == defs.channels[0] else 'a'
                df.to_root(out_file, key=tree, mode=mode)