Exemple #1
0
def unpack_remove_tars():
    """Script to unpack .tars holding directories with pickled events.

    Uses multiprocessing to unpack tars with bash:
    > tar -xf <tar_location> -C <pickle_dir_location>.

    For now, where (currently tarball_dir) and to (pickle_dir) has to be hardcoded in the script.
    """
    # * Where are tars located?
    tarball_dir = get_project_root(
    ) + '/data/oscnext-genie-level5-v01-01-pass2/tarballs'
    tarballs = [path for path in Path(tarball_dir).iterdir()]

    # * WHere should they be put?
    pickle_dir = get_project_root(
    ) + '/data/oscnext-genie-level5-v01-01-pass2/pickles/'
    if not Path(pickle_dir).exists():
        Path(pickle_dir).mkdir()

    # * Multiprocess
    available_cores = cpu_count()
    pickle_dir_list = [pickle_dir] * len(tarballs)
    packed = [entry for entry in zip(tarballs, pickle_dir_list)]

    with Pool(available_cores + 2) as p:
        p.map(unpack_tar_remove, packed)
    print(get_time(), 'Finished unpacking tarballs!')
Exemple #2
0
def make_mask(data_path, mask_name='any', min_doms=0, max_doms=np.inf):
    # * make mask directory if it doesn't exist
    data_path = hf.get_project_root() + hf.get_path_from_root(data_path)
    name = hf.get_dataset_name(data_path)
    
    dir_path = hf.get_project_root() + '/data/masks/'+name
    if not Path(dir_path).is_dir():
        Path(dir_path).mkdir(parents=True)

    if mask_name == 'dom_interval':
        make_dom_interval_mask(data_path, dir_path, min_doms, max_doms)

    # * Make a .dvc-file to track mask
    dvc_path = hf.get_project_root() + '/data'
    subprocess.run(['dvc', 'add', 'masks'], cwd=dvc_path)
Exemple #3
0
def move_tars():
    """Scripts used to move tarballs of rpickled data from HEP to gpulab.

    Script must be run on gpulab - cannot ssh from HEP to gpulab, only other way around.

    Uses rsync to move tarballs. WHere, to and how many must be hardcoded for now.
    """

    # * Setup - where to load data, how many events
    n_pickle_dirs = 1131
    data_dir = get_project_root() + '/data/oscnext-genie-level5-v01-01-pass2/'
    if not Path(data_dir).exists():
        Path(data_dir).mkdir()
        print(get_time(), 'Created directory %s' % (data_dir))
    from_ = '[email protected]:/groups/hep/bjoernhm/CubeML/data/oscnext-genie-level5-v01-01-pass2/tarballs/'
    to_ = data_dir + 'tarballs/'
    if not Path(to_).exists():
        Path(to_).mkdir()
        print(get_time(), 'Created directory %s' % (to_))

    from_tarballs = [from_ + str(i) + '.tar' for i in range(n_pickle_dirs)]
    to_list = [to_ + str(i) + '.tar' for i in range(n_pickle_dirs)]

    # * Zip and multiprocess
    packed = [entry for entry in zip(from_tarballs, to_list)]
    with Pool() as p:
        p.map(move_tar, packed)

    print(get_time(), 'Finished copying tarballs!')
Exemple #4
0
def find_nearest_doms(data_dir_path=get_project_root()+'/data/oscnext-genie-level5-v01-01-pass2/',
                      multiprocess=True,
                      d_name='dom_geom.pickle'):
    
    # * Load precalculated geometry dictionary
    d_geom = pickle.load(open(data_dir_path+d_name, 'rb'))

    # * For each entry, calculate distances to all other DOMs
    # * Extract coordinates and pair with ID
    dom_ids = [dom_id for dom_id in d_geom]
    coords = {key: items['coordinates'] for key, items in d_geom.items()}
    own_coords = [items['coordinates'] for key, items in d_geom.items()]
    
    print(get_time(), 'Calculation of nearest DOMs begun...')
    if multiprocess:
        # * prepare for multiprocessing - we loop over DOM IDs
        coords_list = [coords]*len(dom_ids)
        packed = [pack for pack in zip(dom_ids, own_coords, coords_list)]

        with Pool() as p:
            dicts = p.map(find_nearest_doms_multi, packed)
    else:
        raise ValueError('Only multiprocessing implemented!')
    print(get_time(), 'Calculation finished!')
    
    # * Update the geometry dictionary with the closest DOMs
    for dom_id, d in zip(dom_ids, dicts):
        d_geom[dom_id].update(d)
    
    return d_geom
Exemple #5
0
def make_geom_dict(data_dir_path=get_project_root()+'/data/oscnext-genie-level5-v01-01-pass2/',
                   multiprocess=True,
                   d_name='dom_geom.pickle'):
    
    print(get_time(), 'Making geometry dictionary...')
    shelve_path = data_dir_path+'shelve/oscnext-genie-level5-v01-01-pass2'
    
    # * Get filenames
    with shelve.open(shelve_path) as f:
        filenames = [key for key in f]

    # * Prepare for multiprocessing
    path_list = [shelve_path]*len(filenames)
    packed = [entry for entry in zip(filenames, path_list)]
    
    # * Multiprocess
    if multiprocess:
        with Pool() as p:
            all_dicts = p.map(find_unique_ids, packed)

        # * Combine dictionaries
        print(get_time(), 'Combining dictionaries...')
        dom_geom_dict = {}
        for d in all_dicts:
            dom_geom_dict.update(d)
        print(get_time(), 'Dictionaries combined!')
        
    else:
        dom_geom_dict = {}
        for pack in packed:
            dom_geom_dict.update(find_unique_ids(pack))
    
    return dom_geom_dict
def fit_feature_transformers(pack):
    # * Unpack
    key, d, clip_dict, file_list, \
    n_wanted_sample, n_wanted_histogram, particle_code, transformer = pack

    # * Read some data
    all_data = []
    for file in file_list:
        # * once enough data has been read, break out
        if len(all_data) > n_wanted_sample:
            break
        data = hf.read_h5_dataset(file, key, prefix='raw/')
        if data[0].shape:
            for entry in data:
                all_data.extend(entry)
        else:
            all_data.extend(data)

    # * Data read. Now draw a random sample
    indices = np.array(range(len(all_data)))
    random.shuffle(indices)
    random_subsample = sorted(
        indices[:min(len(indices), int(n_wanted_histogram))])

    # * Draw histogram and save it.
    plot_data = np.array(sorted(np.array(all_data)[random_subsample]))
    plot_data_unclipped = np.array(sorted(
        np.array(all_data)[random_subsample]))
    if clip_dict:
        minimum = clip_dict['min']
        maximum = clip_dict['max']
        plot_data = np.clip(plot_data, minimum, maximum)
    d['data'] = [plot_data]
    d['title'] = key + '- Entries = %.1e' % (plot_data_unclipped.shape[0])

    path = hf.get_project_root() + '/reports/plots/features/'
    d['savefig'] = path + particle_code + '_' + key + '.png'
    fig = rpt.make_plot(d)

    # * Fit a transformer/scaler
    transformer.fit(plot_data_unclipped.reshape(-1, 1))

    # * Transform plot data
    plot_data_transformed = transformer.transform(
        plot_data_unclipped.reshape(-1, 1))

    # * Plot and save
    d_transformed = {'data': [plot_data_transformed]}
    d_transformed['title'] = key + ' transformed - Entries = %.1e' % (
        plot_data_unclipped.shape[0])
    d_transformed[
        'savefig'] = path + particle_code + '_transformed_' + key + '.png'
    fig = rpt.make_plot(d_transformed)

    d_transformer = {key: transformer}

    return d_transformer
Exemple #7
0
def make_tars():
    """Script to pack pickle-directories with single events into .tars 

    Must hardcode where pickles are located and where tars should be put.
    """
    # * Setup - where to load data, how many events
    data_dir = get_project_root() + '/data/oscnext-genie-level5-v01-01-pass2/'
    from_ = data_dir + 'pickles'
    to_ = data_dir + 'tarballs'
    pickle_dirs = [path for path in Path(from_).iterdir()]

    # * Zip and multiprocess
    to_list = [to_] * len(pickle_dirs)
    packed = [entry for entry in zip(pickle_dirs, to_list)]

    available_cores = cpu_count()
    with Pool(available_cores + 2) as p:
        p.map(make_tar, packed)

    print(get_time(), 'Finished making tarballs!')
Exemple #8
0
def make_mask(data_path, dirs, mask_name='all', min_doms=0, max_doms=np.inf, min_energy=-np.inf, max_energy=np.inf):
    # * make mask directory if it doesn't exist
    data_path = get_project_root() + get_path_from_root(data_path)

    if mask_name == 'dom_interval':
        mask_path = make_dom_interval_mask(data_path, dirs, min_doms, max_doms)
    
    if mask_name == 'dom_interval_SRTInIcePulses':
        mask_path = make_dom_interval_mask(data_path, dirs, min_doms, max_doms, dom_mask='SRTInIcePulses')

    elif mask_name == 'all':
        mask_path = make_all_mask(data_path, dirs)
    
    elif mask_name == 'muon_neutrino':
        mask_path = make_particle_mask(data_path, dirs, mask_name)
    
    elif mask_name == 'energy_interval':
        mask_path = make_energy_interval_mask(data_path, dirs, min_energy, max_energy)

    return mask_path
Exemple #9
0
import argparse
import multiprocessing
from src.modules.main_funcs import run_experiment
from src.modules.helper_functions import get_project_root
from pathlib import Path

if __name__ == '__main__':
    description = 'Runs an experiment from the experiments folder.'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--gpu', nargs='+', default='0', type=str, help='Sets the IDs of the GPUs to use')

    args = parser.parse_args()
    
    # * Fetch an experiment - run the oldest first.
    exp_dir = get_project_root() + '/experiments'
    exps = sorted(Path(exp_dir).glob('*.json'))
    
    # ! Someone online set to add next line to ensure CUDA works...
    multiprocessing.set_start_method('spawn')
    print('WHAT THE F**K IS UP')
    # run_experiment(exps[0], gpu_id=args.gpu[0])

Exemple #10
0
                    help='Saves figure(s) in root directory',
                    action='store_true')
args = parser.parse_args()

if __name__ == '__main__':

    # * First create plot dictionaries
    plot_dicts = []
    for model in args.inputs:

        #* Locate the model directory
        paths = hf.find_files(model)
        for path in paths:
            if path.split('/')[-1] == model:
                break

        # * Make a plotting dictionary with the datasets from the different models
        plot_dicts = rprt.get_performance_plot_dicts(path, plot_dicts)

    # * Now display (or save) desired performance plots
    for i, plot_dict in enumerate(plot_dicts):
        if args.save:
            plot_dict['savefig'] = hf.get_project_root(
            ) + '/comparisons/' + plot_dict['title'] + '.png'

        try:
            fig = rprt.make_plot(plot_dict)
        except FileNotFoundError:
            Path(hf.get_project_root() + '/comparisons/').mkdir()
            fig = rprt.make_plot(plot_dict)
Exemple #11
0
def load_and_fit_transformer(pack):
    ids, (key, feature_dict), db_path, n_data = pack

    with shelve.open(db_path, 'r') as db:
        id_iter = iter(ids)
        data =np.array([])
        
        loaded = 0
        transformer = feature_dict['transformer']
        clip_d = feature_dict.get('clip', None)

        # * If we are dealing with a feature that needs to be transforme, make the transformer!
        if transformer:

            # * Extract the function needed for derived features
            fnc = feature_dict['feature_calculator']
            
            # * Loop until we have enough samples for the transformer
            while loaded < n_data:
                
                # * If we iterated over all data, thats it - just exit loop.
                try:
                    event = db[next(id_iter)]['raw']
                except StopIteration:
                    break

                # * If dealing with a derived feature, calculate it!
                if fnc:
                    new_data = fnc(event)
                
                # * If not, just load it
                else:
                    new_data = event[key]
                
                data = np.append(data, new_data)
                if isinstance(new_data, np.ndarray):
                    loaded += new_data.shape[0]
                elif isinstance(new_data, (float, int)):
                    loaded += 1
                else:
                    raise ValueError('load_and_fit_transformer: Unknown type (%s) encountered'%(type(new_data)))

            # * Save plot of pre-transformed data
            path = get_project_root()+'/reports/shelve_data'
            if not Path(path).exists():
                Path(path).mkdir()
            plot_d = {'data': [data], 'savefig': path+'/%s.png'%key}
            _ = make_plot(plot_d)
            
            # * Now fit a transformer
            transformer.fit(data.reshape(-1, 1))
            
            # * save plot of transformed data
            if clip_d:
                data_transformed = np.clip(data, clip_d['min'], clip_d['max'])
                data_transformed = transformer.transform(data_transformed.reshape(-1, 1))
            else:
                data_transformed = transformer.transform(data.reshape(-1, 1))
            plot_d = {'data': [data_transformed], 'savefig': path+'/%s_transformed.png'%key}
            _ = make_plot(plot_d)
    
    return {key: transformer}
Exemple #12
0
    parser.add_argument('--n_transform', default=500000, type=int, help='Sets the amount of datapoints to use in approximating their distribution during fitting of transformer')
    parser.add_argument('--n_cpus', default=cpu_count(), type=int, help='Sets the amount of datapoints to use in approximating their distribution during fitting of transformer')
    parser.add_argument('--path', default='None', type=str, help='Path to shelve-file.')
    parser.add_argument('--fit_transformers', action='store_true', help='Whether or not to fit new transformers.')
    parser.add_argument('--new_name', default='None', type=str, help='Sets the new databases name.')

    args = parser.parse_args()

    if args.path == 'None':
        raise KeyError(r'A path must be supplied! Use flag --path')

    if args.new_name == 'None':
        raise KeyError(r'A new name must be supplied! Use flag --new_name')

    # * Setup - where to load data, how many events
    path_db = Path(get_project_root()+'/'+get_path_from_root(args.path))
    path_geom_dict = str(path_db.parent)+'/dom_geom.pickle'
    path_transformer = str(path_db.parent)+'/transformers.pickle'
    path_new_db = str(path_db.parent)+'/'+args.new_name
    
    n_data = args.n_transform if not args.dev else 1000
    chunksize = args.chunksize if not args.dev else 1000
    n_cpus = args.n_cpus if not args.dev else 2
    feature_dicts = get_feature_dicts()
    geom_features = get_geom_features()
    
    # * Fit and save transformers
    if args.fit_transformers:
        transformers = fit_transformers(str(path_db), n_data, feature_dicts, n_cpus=n_cpus)
        with open(path_transformer, 'wb') as f:
            pickle.dump(transformers, f)
Exemple #13
0
import joblib
from pathlib import Path
from multiprocessing import Pool, cpu_count

import src.modules.helper_functions as hf
import src.modules.preprocessing as pp

if __name__ == '__main__':
    # * For every datafile, make a new datafile to not f**k shit up
    data_dir = hf.get_project_root(
    ) + '/data/oscnext-genie-level5-v01-01-pass2_copy'
    particle_code = '140000'
    prefix = 'transform1'

    # * Load transformers, keys and prepare filenames
    transformer_path = data_dir + '/transformers/' + particle_code + '_' + prefix + '.pickle'
    transformers = joblib.load(open(transformer_path, 'rb'))
    file_list = [
        str(file) for file in Path(data_dir).iterdir() if file.suffix == '.h5'
        and hf.confirm_particle_type(particle_code, file)
    ]
    keys = pp.get_feature_keys()

    # * Pack each filepath with transformers and keys for multiprocessing
    N_FILES = len(file_list)

    transformers_list = [transformers for i_file in range(N_FILES)]
    keys_list = [keys for i_file in range(N_FILES)]
    prefix_list = [prefix for i_file in range(N_FILES)]

    packed = [
Exemple #14
0
    # * drop the first entry, since this is itself
    d = {'closest': [key for key, value in sorted(dists.items(), key=lambda kv: kv[1])][1:]}

    return d

if __name__ == '__main__':

    # * Parse arguments!
    description = 'Creates a dictionary of DOM-IDs and their positions by looping over all DOMs.'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--multiprocess', action='store_true', help='Enables multiprocessing.')
    parser.add_argument('--create_geom_dict', action='store_true', help='Calculates a geometry dictionary.')
    parser.add_argument('--find_nearest', action='store_true', help='Finds the nearest DOMs for each DOM.')
    args = parser.parse_args()

    data_dir_path = get_project_root()+'/data/oscnext-genie-level5-v01-01-pass2/'
    d_name = 'dom_geom.pickle'

    # * If geometry dictionary does not exist, make it first
    if args.create_geom_dict or not Path(data_dir_path+d_name).exists():
        dom_geom_dict = make_geom_dict(data_dir_path=data_dir_path, multiprocess=args.multiprocess, d_name=d_name)
        
        # * Save geometry as a dict with DOM ID: np.array([x, y, z])
        n_doms_found = len([key for key in dom_geom_dict])
        pickle.dump(dom_geom_dict, open(data_dir_path+d_name, 'wb'))
        print(get_time(), 'Found %d DOMs in total.'%(n_doms_found))
        print(get_time(), 'Saved file at %s'%(get_path_from_root(data_dir_path+d_name)))

    # * Calculate distance to all other DOMs if it doesn't already exist
    if args.find_nearest:
        d_geom = find_nearest_doms(data_dir_path=data_dir_path, multiprocess=args.multiprocess, d_name=d_name)
d['label'].append('Std. mean')

# * Greatest fractional difference.
# * Calculated by requiring (maxprod/minprd)**n = 10
greatest_frac_diff = (max(weights_prods)/min(weights_prods))
new_exp = np.log(10.0)/np.log(greatest_frac_diff)
gfd_10 = weights_prods**new_exp
gfd_10_normed = gfd_10/np.mean(gfd_10)
d['x'].append(x_vals)
d['y'].append(gfd_10_normed)
d['label'].append('GFD=10')


# * Make a spline
interpolator_linear = interpolate.interp1d(x_vals, gfd_10_normed, fill_value="extrapolate", kind='quadratic')
x_extrapolate = np.linspace(0.0, 3.0, 200)
gfd_10_extrapolate = interpolator_linear(x_extrapolate)
d['x'].append(x_extrapolate)
d['y'].append(gfd_10_extrapolate)
d['label'].append('GFD=10, quadratic interp.')
# # * Print values
# for count, e_sigma, gmean, prod, mean, gfd in zip(counts_weights_normed, energy_weights_normed, gmeans_normed, weights_prods_normed, weights_meaned, gfd_10_normed):
#     print('%.3f, %.3f, %.3f, %.3f, %.3f, %.3f'%(count, e_sigma, gmean, prod, mean, gfd))

d['yscale'] = 'log'
d['savefig'] = get_project_root() + '/reports/plots/energyreg_weight_propositions.png' 
d['title'] = 'Combination of entries in each range + Icecube performance'
d['xlabel'] = r'log(E) [E/GeV]'
d['ylabel'] = r'Weight value'

fig = make_plot(d)
Exemple #16
0
def generate_gms_table_converters(losses="all"):
    """Generate converters for expected values of muon length <--> muon energy based on
    the tabulated muon energy loss model [1], spline-interpolated for smooth behavior
    within the range of tabulated energies / lengths.
    Note that "gms" in the name comes from the names of the authors of the table used.
    Parameters
    ----------
    losses : comma-separated str or iterable of strs
        Valid sub-values are {"all", "ionization", "brems", "photonucl", "pair_prod"}
        where if any in the list is specified to be "all" or if all of {"ionization",
        "brems", "photonucl", and "pair_prod"} are specified, this supercedes all
        other choices and the CSDA range values from the table are used..
    Returns
    -------
    muon_energy_to_length : callable
        Call with a muon energy to return its expected length
    muon_length_to_energy : callable
        Call with a muon length to return its expected energy
    energy_bounds : tuple of 2 floats
        (lower, upper) energy limits of table; below the lower limit, lengths are
        estimated to be 0 and above the upper limit, a ValueError is raised;
        corresponding behavior is enforced for lengths passed to `muon_length_to_energy`
        as well.
    References
    ----------
    [1] D. E. Groom, N. V. Mokhov, and S. I. Striganov, Atomic Data and Nuclear Data
        Tables, Vol. 78, No. 2, July 2001, p. 312. Table II-28.
    """
    if isinstance(losses, string_types):
        losses = tuple(x.strip().lower() for x in losses.split(","))

    VALID_MECHANISMS = ("ionization", "brems", "pair_prod", "photonucl", "all")
    for mechanism in losses:
        assert mechanism in VALID_MECHANISMS

    if "all" in losses or set(losses) == set(
            m for m in VALID_MECHANISMS if m != "all"):
        losses = ("all", )

    fpath = get_project_root(
    ) + "/src/modules/retro_data/muon_stopping_power.csv"
    table = np.loadtxt(fpath, delimiter=",")

    kinetic_energy = table[:, 0]  # (GeV)
    total_energy = kinetic_energy + MUON_REST_MASS

    mev_per_gev = 1e-3
    cm_per_m = 1e2

    if "all" in losses:
        # Continuous-slowing-down-approximation (CSDA) range (cm * g / cm^3)
        csda_range = table[:, 7]
        mask = np.isfinite(csda_range)
        csda_range = csda_range[mask]
        ice_csda_range_m = csda_range / NOMINAL_ICE_DENSITY / cm_per_m  # (m)
        energy_bounds = (np.min(total_energy[mask]),
                         np.max(total_energy[mask]))
        _, muon_energy_to_length = generate_lerp(
            x=total_energy[mask],
            y=ice_csda_range_m,
            low_behavior="constant",
            high_behavior="extrapolate",
            low_val=0,
        )
        _, muon_length_to_energy = generate_lerp(
            x=ice_csda_range_m,
            y=total_energy[mask],
            low_behavior="constant",
            high_behavior="extrapolate",
            low_val=0,
        )
    else:
        from scipy.interpolate import UnivariateSpline

        # All stopping powers given in (MeV / cm * cm^3 / g)
        stopping_power_by_mechanism = dict(
            ionization=table[:, 2],
            brems=table[:, 3],
            pair_prod=table[:, 4],
            photonucl=table[:, 5],
        )

        stopping_powers = []
        mask = np.zeros(shape=table.shape[0], dtype=bool)
        for mechanism in losses:
            addl_stopping_power = stopping_power_by_mechanism[mechanism]
            mask |= np.isfinite(addl_stopping_power)
            stopping_powers.append(addl_stopping_power)
        stopping_power = np.nansum(stopping_powers, axis=0)[mask]
        stopping_power *= cm_per_m * mev_per_gev * NOMINAL_ICE_DENSITY

        valid_energies = total_energy[mask]
        energy_bounds = (valid_energies.min(), valid_energies.max())
        sample_energies = np.logspace(
            start=np.log10(valid_energies.min()),
            stop=np.log10(valid_energies.max()),
            num=1000,
        )
        spl = UnivariateSpline(x=valid_energies,
                               y=1 / stopping_power,
                               s=0,
                               k=3)
        ice_range = np.array(
            [spl.integral(valid_energies.min(), e) for e in sample_energies])
        _, muon_energy_to_length = generate_lerp(
            x=sample_energies,
            y=ice_range,
            low_behavior="constant",
            high_behavior="extrapolate",
            low_val=0,
        )
        _, muon_length_to_energy = generate_lerp(
            x=ice_range,
            y=sample_energies,
            low_behavior="constant",
            high_behavior="extrapolate",
            low_val=0,
        )

    return muon_energy_to_length, muon_length_to_energy, energy_bounds
Exemple #17
0
# from matplotlib import pyplot as plt
# from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
# import h5py as h5
# from time import time
# from scipy.stats import norm
# import subprocess
# from multiprocessing import Pool, cpu_count

# from src.modules.classes import *
from pathlib import Path
# from src.modules.reporting import *
# from src.modules.constants import *
# import src.modules.loss_funcs as lf
import src.modules.helper_functions as hf
import subprocess
# from src.modules.eval_funcs import *
# from src.modules.preprocessing import *
# # import src.modules.preprocessing as pp
# from src.modules.main_funcs import *
# import shelve
# import sys
# from time import sleep
for path in Path(hf.get_project_root() + '/reports/thesis_plots').iterdir():
    if path.is_dir():
        if path.name == 'all_pgf':
            continue
        print(hf.get_time(), 'Running', path.name)
        runpath = str(path) + '/script.py'
        subprocess.call(['python', runpath])
        print('')
Exemple #18
0
                )
            else:
                weights, interpolator, savename = make_weights(
                name, ids, db, debug=args.dev, interpolator=interpolator, alpha=args.alpha
            )

            # Save in DB
            ids_strings = [str(idx) for idx in ids]
            print(get_time(), 'Writing %s to database'%(savename))
            db.write('scalar', savename, ids_strings, weights)
            print(get_time(), 'Weights saved!')


        # Save a figure of the weights
        if args.make_plot:
            if name == 'uniform_direction_weights':
                x = np.linspace(-1.0, 1.0)
            else:
                x = np.linspace(0.0, 3.0)
            y = interpolator(x)
            d = {'x': [x], 'y': [y]}
            d['savefig'] = '/'.join([get_project_root(), 'reports/plots', savename+'.png'])
            d['yscale'] = 'log'
            _ = make_plot(d)
        
        if args.save_interpolator:
            path = PATH_DATA_OSCNEXT + '/weights/' + savename + '.pickle'
            with open(path, 'wb') as f:
                pickle.dump(interpolator, f)

Exemple #19
0
        weights, interpolator = inverse_performance_muon_energy(
            masks,
            dataset_path,
            from_frac=from_frac,
            to_frac=to_frac,
            debug=debug)

    return weights, interpolator


if __name__ == '__main__':

    # ! Can use 2*n_cpus - only ~45 % of processors are used
    # ! Update: Seems like with 2*n_cpus, ~45 % of procesors are also used.
    # * Choose dataset, masks and size of subset to calculate weights from
    dataset_path = get_project_root(
    ) + '/data/oscnext-genie-level5-v01-01-pass2'
    masks = ['muon_neutrino']
    names = args.name
    if not names:
        raise KeyError('Names must be supplied!')

    # * Ensure weight directory exists
    weights_dir = dataset_path + '/weights/'
    if not Path(weights_dir).exists():
        Path(weights_dir).mkdir()

    # * from and to are used for spline calculation
    from_frac, to_frac = args.from_frac, args.to_frac
    if args.dev:
        from_frac, to_frac = 0.8, 0.81
        PRINT_EVERY = 100