Python make_flat_ntuple Examples

Programming Language: Python

Namespace/Package Name: jetnet.pyprep

Method/Function: make_flat_ntuple

Examples at hotexamples.com: 2

Python make_flat_ntuple - 2 examples found. These are the top rated real world Python examples of jetnet.pyprep.make_flat_ntuple extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: jetnet-make-flat-ntuple.py Project: dguest/JetFitter-Training

def make_flat_ntuple(
    input_files, 
    pt_divisions, 
    weight_file = '', 
    jet_collection = 'BTag_AntiKt4TopoEMJetsReTagged', 
    jet_tagger = 'JetFitterCharm', 
    output_path = None, 
    rds_path = 'reduced_dataset.root', 
    observer_discriminators = _default_observers, 
    do_test = False, 
    skim_function = pyprep.make_flat_ntuple, 
    ): 

    double_variables, int_variables = rds.get_allowed_rds_variables(
        input_files = input_files, 
        full_dir_name = '_'.join([jet_collection,jet_tagger]))

    # --- make weights if a name is given 
    if weight_file and not os.path.isfile(weight_file): 
        
        # build a light ntuple if one doesn't exist
        if os.path.isfile(rds_path): 
            small_rds_path = rds_path 

        else: 
            print 'making flat ntuple to build weight file'

            rds_dir, rds_name = os.path.split(rds_path)
            small_rds = '.'.join(rds_name.split('.')[:-1]) + '_small.root'
            small_rds_path = os.path.join(rds_dir,small_rds)
            if not os.path.isfile(small_rds_path): 
                pyprep.make_flat_ntuple(
                    input_files = input_files, 
                    jet_collection = jet_collection, 
                    jet_tagger = jet_tagger, 
                    output_file = small_rds_path)
            
        pt_low, pt_high = (15.0, 250.0)
        log_span = log(pt_high) - log(pt_low)
        log_range = [log(pt_low) + i * log_span / 10 for i in xrange(11)]
        pt_bins = [exp(x) for x in log_range]

        from jetnet import cxxprofile
        cxxprofile.pro2d(
            in_file = small_rds_path, 
            tree = 'SVTree', 
            plots = [( ('JetPt', 30,15.0,200),
                       ('JetEta',10,-2.5,2.5) )], 
            tags = ['bottom','charm','light'], 
            out_file = weight_file, 
            show_progress = True)

    # --- rds part

    rds_dir, rds_file = os.path.split(rds_path)
    if rds_dir and not os.path.isdir(rds_dir): 
        os.mkdir(rds_dir)

    if os.path.isfile(rds_path): 
        raise IOError(
            "{} already exists, refusing to overwrite".format(rds_path) )
    else: 
        skim_function(
            input_files = input_files, 
            weight_file = weight_file, 
            double_variables = double_variables, 
            int_variables = int_variables, 
            observer_discriminators = observer_discriminators, 
            pt_divisions = pt_divisions, 
            jet_collection = jet_collection, 
            jet_tagger = jet_tagger, 
            output_file = rds_path, 
            debug = do_test, 
            )

Example #2

Show file

File: jetnet-train-then-test.py Project: dguest/JetFitter-Training

def train_and_test(input_files, 
                   config_file, 
                   working_dir = None, 
                   do_test = False, 
                   ): 


    config = SafeConfigParser()
    config.read(config_file)

    # --- setup preprocessing
    preproc = dict(config.items('preprocessing'))
    jet_collection = preproc['jet_collection']


    pt_divisions = [float(x) for x in preproc['pt_divisions'].split() ]
    observer_discriminators = preproc['observer_discriminators'].split()

    # --- early load of post-training options  
    training_opts = dict(config.items('training'))
    testing_opts = dict(config.items('testing'))
    training_variables = training_opts['variables'].split()

    testing_dataset = None

    if 'testing_dataset' in testing_opts: 
        testing_dataset = testing_opts['testing_dataset']

    # --- change some things if this is an array job
    jet_tagger = preproc['jet_tagger']
    if 'ARRAYID' in jet_tagger: 
        the_array_id = os.environ['PBS_ARRAYID'].rjust(2,'0')
        jet_tagger = jet_tagger.replace('ARRAYID',the_array_id)
        working_dir = jet_tagger
        if testing_dataset: 
            testing_dataset = os.path.join(working_dir,testing_dataset)

    if testing_dataset and not os.path.isfile(testing_dataset): 
        raise IOError('{} not found'.format(testing_dataset))

    flavor_weights = {}
    if config.has_section('weights'): 
        warn('moving [weights] contents into [training] section', 
             FutureWarning)
        flavor_weights = dict( config.items('weights') )
        for wt_name, wt in flavor_weights.items(): 
            config.set('training', wt_name + '_wt', wt)
        config.remove_section('weights')
        with open(config_file_name,'w') as new_cfg: 
            config.write(new_cfg)

    flavors = ['bottom','charm','light']
    flavor_weights = { 
        f : config.get('training', f + '_wt') for f in flavors
        }
    for f in flavor_weights: 
        flavor_weights[f] = float(flavor_weights[f])


    # --- setup the working directory 
    if not working_dir: 
        working_dir = jet_collection
    if not os.path.isdir(working_dir): 
        os.mkdir(working_dir)

    # --- hold here if someone else is working 
    hold_job(working_dir)
    set_hold(working_dir)

    # --- rds part
    rds_name = 'reduced_dataset.root'
    # get weights file 
    rds_dir = os.path.join(working_dir, 'reduced')
    if not os.path.isdir(rds_dir): 
        os.mkdir(rds_dir)

    rds_path = os.path.join(rds_dir, rds_name )
    if not testing_dataset: 
        testing_dataset = rds_path

    weight_file = os.path.join(rds_dir, 'weights.root')
    if not os.path.isfile(weight_file): 
        
        # build a light ntuple if one doesn't exist
        if os.path.isfile(rds_path): 
            small_rds_path = rds_path 

        else: 
            print '--- making flat ntuple to build weight file ---'
            small_rds = 'small_rds.root'
            small_rds_path = os.path.join(rds_dir,small_rds)
            if not os.path.isfile(small_rds_path): 
                pyprep.make_flat_ntuple(
                    input_files = input_files, 
                    jet_collection = jet_collection, 
                    jet_tagger = jet_tagger, 
                    output_file = small_rds_path)
            
        pt_low, pt_high = (15.0, 300)
        log_span = log(pt_high) - log(pt_low)
        log_range = [log(pt_low) + i * log_span / 10 for i in xrange(11)]
        pt_bins = [exp(x) for x in log_range]

        print '--- making weight file ---'
        from jetnet import cxxprofile
        cxxprofile.pro2d(
            in_file = small_rds_path, 
            tree = 'SVTree', 
            plots = [( ('JetPt', pt_bins),
                       ('JetEta',10,-2.5,2.5) )], 
            tags = ['bottom','charm','light'], 
            out_file = weight_file, 
            show_progress = True)


    double_variables, int_variables = rds.get_allowed_rds_variables(
        input_files = input_files, 
        full_dir_name = jet_collection + '_' + jet_tagger)


    if not os.path.isfile(rds_path): 
        print '--- making flattened dataset for training ---'
        flags = 'hr' if not do_test else 'd'
        pyprep.make_flat_ntuple(
            input_files = input_files, 
            weight_file = weight_file, 
            double_variables = double_variables, 
            int_variables = int_variables, 
            observer_discriminators = observer_discriminators, 
            pt_divisions = pt_divisions, 
            jet_collection = jet_collection, 
            jet_tagger = jet_tagger, 
            output_file = rds_path, 
            flags = flags, 
            )

    # --- unset other job hold 
    set_hold(working_dir, value = False)

    proc = process.RDSProcess(
        reduced_dataset = rds_path, 
        working_dir = working_dir, 
        training_variables = training_variables, 
        flavor_weights = flavor_weights, 
        testing_dataset = testing_dataset, 
        do_test = do_test, 
        config_file = config_file)
    proc.start()
    proc.join()
    proc_outputs = proc.out_queue.get(block = False)


    # --- make the summary folder 

    working_dir_list = working_dir.split('/')[:-1]
    if not working_dir_list: 
        summary_dir = 'summary'
    else: 
        working_dir_parent = os.path.join(*working_dir_list)
        summary_dir = os.path.join(working_dir_parent,'summary')

    if not os.path.isdir(summary_dir): 
        os.mkdir(summary_dir)

    summary_base_name, cfg_ext = os.path.splitext(config_file)
    if 'PBS_ARRAYID' in os.environ: 
        summary_base_name += '_subjob{}'.format(os.environ['PBS_ARRAYID'])

    if 'profile' in proc_outputs: 
        profile_summary_name = summary_base_name + '_profile.root'
        profile_summary_path = os.path.join(summary_dir,profile_summary_name)
        shutil.copyfile(proc_outputs['profile'], profile_summary_path)


    this_config_name = summary_base_name + cfg_ext
    this_config_path = os.path.join(summary_dir, this_config_name)
    shutil.copyfile(config_file, this_config_path)