Beispiel #1
0
def fit_feature_transformers(pack):
    # * Unpack
    key, d, clip_dict, file_list, \
    n_wanted_sample, n_wanted_histogram, particle_code, transformer = pack

    # * Read some data
    all_data = []
    for file in file_list:
        # * once enough data has been read, break out
        if len(all_data) > n_wanted_sample:
            break
        data = hf.read_h5_dataset(file, key, prefix='raw/')
        if data[0].shape:
            for entry in data:
                all_data.extend(entry)
        else:
            all_data.extend(data)

    # * Data read. Now draw a random sample
    indices = np.array(range(len(all_data)))
    random.shuffle(indices)
    random_subsample = sorted(
        indices[:min(len(indices), int(n_wanted_histogram))])

    # * Draw histogram and save it.
    plot_data = np.array(sorted(np.array(all_data)[random_subsample]))
    plot_data_unclipped = np.array(sorted(
        np.array(all_data)[random_subsample]))
    if clip_dict:
        minimum = clip_dict['min']
        maximum = clip_dict['max']
        plot_data = np.clip(plot_data, minimum, maximum)
    d['data'] = [plot_data]
    d['title'] = key + '- Entries = %.1e' % (plot_data_unclipped.shape[0])

    path = hf.get_project_root() + '/reports/plots/features/'
    d['savefig'] = path + particle_code + '_' + key + '.png'
    fig = rpt.make_plot(d)

    # * Fit a transformer/scaler
    transformer.fit(plot_data_unclipped.reshape(-1, 1))

    # * Transform plot data
    plot_data_transformed = transformer.transform(
        plot_data_unclipped.reshape(-1, 1))

    # * Plot and save
    d_transformed = {'data': [plot_data_transformed]}
    d_transformed['title'] = key + ' transformed - Entries = %.1e' % (
        plot_data_unclipped.shape[0])
    d_transformed[
        'savefig'] = path + particle_code + '_transformed_' + key + '.png'
    fig = rpt.make_plot(d_transformed)

    d_transformer = {key: transformer}

    return d_transformer
Beispiel #2
0
def energy_plot(models, perf_classes, title=None, savefig=None):
    # * t plot
    edges, y, yerr, label = [], [], [], []
    data, bins, weights, histtype, log = [], [], [], [], []
    for model, pc in zip(models, perf_classes):
        pd = pc.get_relE_dict()

        pd_edges = [pd['edges'][0][:]]
        pd_y = [pd['y'][0][:]]
        pd_yerr = [pd['yerr'][0][:]]

        edges.extend(pd_edges)
        y.extend(pd_y)
        yerr.extend(pd_yerr)
        label.append(model)

        pd_h = pc.get_energy_dict()
        data.extend(pd_h['data'])
        bins.extend(pd_h['bins'])
        weights.extend(pd_h['weights'])
        histtype.extend(pd_h['histtype'])
        log.extend(pd_h['log'])
        del pd_h['color']

    edges.append(pc.bin_edges)
    y.append(pc.relE_crs_sigmas)
    yerr.append(pc.relE_crs_errors)
    label.append('Icecube')
    pd['edges'] = edges
    pd['y'] = y
    pd['yerr'] = yerr
    pd['label'] = label
    pd_h['data'] = data
    pd_h['bins'] = bins
    pd_h['weights'] = weights
    pd_h['histtype'] = histtype
    pd_h['log'] = log

    pd['grid'] = True
    pd['y_minor_ticks_multiple'] = 0.2
    if savefig:
        pd_h['savefig'] = savefig
    if title:
        pd_h['title'] = title

    fig = rpt.make_plot(pd)
    fig = rpt.make_plot(pd_h, h_figure=fig, axes_index=0)
    return fig
Beispiel #3
0
def t_plot(models, perf_classes, title=None, savefig=None):
    # * t plot
    edges, y, yerr, label = [], [], [], []
    data, bins, weights, histtype, log = [], [], [], [], []
    for model, pc in zip(models, perf_classes):
        pd = pc.get_t_dict()
        edges.extend(pd['edges'])
        y.extend(pd['y'])
        yerr.extend(pd['yerr'])
        label.append(model)

        pd_h = pc.get_energy_dict()
        data.extend(pd_h['data'])
        bins.extend(pd_h['bins'])
        weights.extend(pd_h['weights'])
        histtype.extend(pd_h['histtype'])
        log.extend(pd_h['log'])
        del pd_h['color']

    edges.append(pc.bin_edges)
    y.append(pc.t_crs_sigmas)
    yerr.append(pc.t_crs_errors)
    pd['edges'] = edges
    pd['y'] = y
    pd['yerr'] = yerr
    pd['xlabel'] = r'$\log_{10}$E [E/GeV]'
    pd['ylabel'] = r'$\sigma_{t}$ [ns]'
    pd['yrange'] = [0, 420]
    if savefig:
        pd['savefig'] = savefig

    fig = rpt.make_plot(pd)
    return fig
Beispiel #4
0
def energy_plot(models, perf_classes, title=None, savefig=None):
    # * t plot
    edges, y, yerr, label = [], [], [], []
    data, bins, weights, histtype, log = [], [], [], [], []
    for model, pc in zip(models, perf_classes):
        pd = pc.get_relE_dict()

        pd_edges = [pd['edges'][0][:]]
        pd_y = [pd['y'][0][:]]
        pd_yerr = [pd['yerr'][0][:]]

        edges.extend(pd_edges)
        y.extend(pd_y)
        yerr.extend(pd_yerr)
        label.append(model)

    edges.append(pc.bin_edges)
    y.append(pc.relE_crs_sigmas)
    yerr.append(pc.relE_crs_errors)
    label.append('Icecube')
    pd['edges'] = edges
    pd['y'] = y
    pd['yerr'] = yerr
    pd['xlabel'] = r'$\log_{10}$E [E/GeV]'
    pd['ylabel'] = r'Relative Error, $\sigma\left( \left(E_{pred}-E_{true}\right)/E_{true}\right)$'
    if savefig:
        pd['savefig'] = savefig

    fig = rpt.make_plot(pd)
    return fig
Beispiel #5
0
def z_plot(models, perf_classes, title=None, savefig=None):
    # * t plot
    edges, y, yerr, label = [], [], [], []
    data, bins, weights, histtype, log = [], [], [], [], []
    for model, pc in zip(models, perf_classes):
        pd = pc.get_z_dict()
        edges.extend(pd['edges'])
        y.extend(pd['y'])
        yerr.extend(pd['yerr'])
        label.append(model)

        pd_h = pc.get_energy_dict()
        data.extend(pd_h['data'])
        bins.extend(pd_h['bins'])
        weights.extend(pd_h['weights'])
        histtype.extend(pd_h['histtype'])
        log.extend(pd_h['log'])
        del pd_h['color']

    edges.append(pc.bin_edges)
    y.append(pc.z_crs_sigmas)
    yerr.append(pc.z_crs_errors)
    label.append('Icecube')
    pd['edges'] = edges
    pd['y'] = y
    pd['yerr'] = yerr
    pd['label'] = label

    pd_h['data'] = data
    pd_h['bins'] = bins
    pd_h['weights'] = weights
    pd_h['histtype'] = histtype
    pd_h['log'] = log
    if savefig:
        pd_h['savefig'] = savefig
    if title:
        pd_h['title'] = title
    fig = rpt.make_plot(pd)
    fig = rpt.make_plot(pd_h, h_figure=fig, axes_index=0)

    # mod = pd['y'][0]
    # ice = pd['y'][2]
    # print(-(np.array(mod)-np.array(ice))/np.array(ice))
    return fig
Beispiel #6
0
                )
            else:
                weights, interpolator, savename = make_weights(
                name, ids, db, debug=args.dev, interpolator=interpolator, alpha=args.alpha
            )

            # Save in DB
            ids_strings = [str(idx) for idx in ids]
            print(get_time(), 'Writing %s to database'%(savename))
            db.write('scalar', savename, ids_strings, weights)
            print(get_time(), 'Weights saved!')


        # Save a figure of the weights
        if args.make_plot:
            if name == 'uniform_direction_weights':
                x = np.linspace(-1.0, 1.0)
            else:
                x = np.linspace(0.0, 3.0)
            y = interpolator(x)
            d = {'x': [x], 'y': [y]}
            d['savefig'] = '/'.join([get_project_root(), 'reports/plots', savename+'.png'])
            d['yscale'] = 'log'
            _ = make_plot(d)
        
        if args.save_interpolator:
            path = PATH_DATA_OSCNEXT + '/weights/' + savename + '.pickle'
            with open(path, 'wb') as f:
                pickle.dump(interpolator, f)

Beispiel #7
0
        index for index in range(len(lrs)) if from_lr <= lrs[index] <= to_lr
    ]

    chosen_lrs = np.array(lrs)[indices]
    chosen_losses = np.array(losses)[indices]
    if args.max_yrange != np.inf:
        maxy = args.max_yrange
    else:
        # maxy = np.max(chosen_losses)
        maxy = None

    if args.min_yrange != np.inf:
        miny = args.min_yrange
    else:
        # miny = np.min(chosen_losses)
        miny = None

    d = {
        'x': [chosen_lrs],
        'y': [chosen_losses],
        'xscale': 'log',
        'savefig': model + '/lr_vs_loss.png',
        'xlabel': 'Learning Rate',
        'ylabel': 'Loss',
        'yrange': {
            'bottom': miny,
            'top': maxy
        }
    }
    fig = make_plot(d)
Beispiel #8
0
    perf_class_path = path + '/data/Performance.pickle'
    perf_class = pickle.load(open(perf_class_path, "rb"))
    perf_classes.append(perf_class)

# attrs = vars(perf_classes[0])
# for attr in attrs:
#     print(attr)
perf = perf_classes[0]
for pred_key, reco_key in zip(perf._performance_keys, perf._reco_keys):
    d = perf._get_perf_dict(pred_key, reco_key)
    d['ylabel'] = 'Energy Resolution [%]'
    d['yrange'] = [-0.05, 1.3]
    d['title'] = 'Energy Regression Performance'

    if perf._reco_keys:
        h_fig = rpt.make_plot(d, position=[0.125, 0.26, 0.775, 0.62])
        d = perf._get_rel_perf_dict(pred_key)
        d['subplot'] = True
        d['axhline'] = [0.0]
        h_fig = rpt.make_plot(d,
                              h_figure=h_fig,
                              position=[0.125, 0.11, 0.775, 0.15])
        d_energy = perf._get_energy_dict()
        _ = rpt.make_plot(d_energy, h_figure=h_fig, axes_index=0)

# path = get_project_root() + '/plots/polar_L2_vs_sqr_angle.png'
# title = 'Energy: Stacked 256 LSTM-size Huber (blue), 1028 LSTM L2 (orange)'

# fig = energy_plot(models, perf_classes, title=title)#, savefig=path)
Beispiel #9
0
                    help='Saves figure(s) in root directory',
                    action='store_true')
args = parser.parse_args()

if __name__ == '__main__':

    # * First create plot dictionaries
    plot_dicts = []
    for model in args.inputs:

        #* Locate the model directory
        paths = hf.find_files(model)
        for path in paths:
            if path.split('/')[-1] == model:
                break

        # * Make a plotting dictionary with the datasets from the different models
        plot_dicts = rprt.get_performance_plot_dicts(path, plot_dicts)

    # * Now display (or save) desired performance plots
    for i, plot_dict in enumerate(plot_dicts):
        if args.save:
            plot_dict['savefig'] = hf.get_project_root(
            ) + '/comparisons/' + plot_dict['title'] + '.png'

        try:
            fig = rprt.make_plot(plot_dict)
        except FileNotFoundError:
            Path(hf.get_project_root() + '/comparisons/').mkdir()
            fig = rprt.make_plot(plot_dict)
Beispiel #10
0
FRAC = 0.1
from_, to_ = 0.0, 0.1
end = int(FRAC * len(tot_energy))
from_i, to_i = int(from_ * len(tot_energy)), int(to_ * len(tot_energy))

from2_, to2_ = 0.9, 1.0
end = int(FRAC * len(tot_energy))
from2_i, to2_i = int(from2_ * len(tot_energy)), int(to2_ * len(tot_energy))

path1 = get_project_root() + '/plots/transformed_E_dist.png'
title1 = 'Transformed energy distribution'
d1 = {
    'data': [tot_charge_sorted[from_i:to_i], tot_charge_sorted[from2_i:to2_i]],
    'density': [True, True]
}  #, 'title': title1, 'savefig': path1}
a1 = rpt.make_plot(d1)
x = np.arange(len(energy_sorted))
d = {'x': [x], 'y': [energy_sorted]}
f = rpt.make_plot(d)

# * MAKING A CUT IN TOT CHARGE
tot_charge_sorted, energy_sorted = sort_pairs(tot_tot_charge, tot_energy)
tot_charge_sorted = np.array(tot_charge_sorted)
energy_sorted = np.array(energy_sorted)
charge_cut = 80.0
indices = tot_charge_sorted < charge_cut

energy_cutted = energy_sorted[indices]
d1 = {
    'data': [energy_sorted[indices], energy_sorted[~indices]],
    'density': [False, False]
Beispiel #11
0
def load_and_fit_transformer(pack):
    ids, (key, feature_dict), db_path, n_data = pack

    with shelve.open(db_path, 'r') as db:
        id_iter = iter(ids)
        data =np.array([])
        
        loaded = 0
        transformer = feature_dict['transformer']
        clip_d = feature_dict.get('clip', None)

        # * If we are dealing with a feature that needs to be transforme, make the transformer!
        if transformer:

            # * Extract the function needed for derived features
            fnc = feature_dict['feature_calculator']
            
            # * Loop until we have enough samples for the transformer
            while loaded < n_data:
                
                # * If we iterated over all data, thats it - just exit loop.
                try:
                    event = db[next(id_iter)]['raw']
                except StopIteration:
                    break

                # * If dealing with a derived feature, calculate it!
                if fnc:
                    new_data = fnc(event)
                
                # * If not, just load it
                else:
                    new_data = event[key]
                
                data = np.append(data, new_data)
                if isinstance(new_data, np.ndarray):
                    loaded += new_data.shape[0]
                elif isinstance(new_data, (float, int)):
                    loaded += 1
                else:
                    raise ValueError('load_and_fit_transformer: Unknown type (%s) encountered'%(type(new_data)))

            # * Save plot of pre-transformed data
            path = get_project_root()+'/reports/shelve_data'
            if not Path(path).exists():
                Path(path).mkdir()
            plot_d = {'data': [data], 'savefig': path+'/%s.png'%key}
            _ = make_plot(plot_d)
            
            # * Now fit a transformer
            transformer.fit(data.reshape(-1, 1))
            
            # * save plot of transformed data
            if clip_d:
                data_transformed = np.clip(data, clip_d['min'], clip_d['max'])
                data_transformed = transformer.transform(data_transformed.reshape(-1, 1))
            else:
                data_transformed = transformer.transform(data.reshape(-1, 1))
            plot_d = {'data': [data_transformed], 'savefig': path+'/%s_transformed.png'%key}
            _ = make_plot(plot_d)
    
    return {key: transformer}
        # energy_train.extend(energy)

n_read = 0
for index in rand_val:
    file = get_project_root() + train[index]
    if n_read >= n_wanted:
        break

    with h5.File(file, 'r') as f:
        # energy = f[key]
        n_in_file_val.append(f['meta/events'][()])
        # n_read += len(energy)
        # energy_val.extend(energy)

# d = {'data': [energy_train, energy_val]}
# fig = rpt.make_plot(d)
title = 'Distribution of number of events in files'
path_save = get_project_root() + '/plots/n_events_in_files.png'
d = {
    'data': [n_in_file_train, n_in_file_val],
    'density': [True, True],
    'title': title,
    'label': ['Train', 'Val'],
    'savefig': path_save
}
fig2 = rpt.make_plot(d)

# d = {'x': [np.arange(len(energy_train))], 'y': [energy_train]}
# train_fig = rpt.make_plot(d_train)
# tot_n_doms = [entry for entry in tot_n_doms if entry<100]
Beispiel #13
0
# from src.modules.classes import *
import src.modules.loss_funcs as lf
from src.modules.helper_functions import *
from src.modules.eval_funcs import *
import src.modules.reporting as rpt

particle = 'muon_neutrino'
dataset = get_project_root()+get_path_from_root('/CubeML/data/oscnext-genie-level5-v01-01-pass2')

tot_energy = []
events_wanted = np.inf
events_loaded = 0
for file in Path(dataset).iterdir():
    if events_loaded >= events_wanted:
        break
    if not (file.suffix == '.h5' and confirm_particle_type(get_particle_code(particle), file)):
        continue
     
    with h5.File(file, 'r') as f:
        energy = f['raw/true_primary_energy']
        events_loaded += len(energy)
        tot_energy.extend(energy)

# tot_n_doms = [entry for entry in tot_n_doms if entry<100]
# %%

path1 = get_project_root() + '/plots/transformed_E_dist.png'
title1 = 'Transformed energy distribution'
d1 = {'data': [tot_energy]}#, 'title': title1, 'savefig': path1}
a1 = rpt.make_plot(d1)
Beispiel #14
0
        tot_n_doms.extend(n_doms)
        tot_energy.extend(energy)
doms, energy = sort_pairs(tot_n_doms, tot_energy)

# tot_n_doms = [entry for entry in tot_n_doms if entry<100]
# %%
FRAC = 0.1
from_, to_ = 0.0, 0.9
end = int(FRAC * len(doms))
from_i, to_i = int(from_ * len(doms)), int(to_ * len(doms))

path1 = get_project_root() + '/plots/%s_energy_vs_seqlen.png' % (particle)
title1 = '%s: Bottom and upper %.0f %% seq. length log(e) dist' % (particle,
                                                                   FRAC * 100)
d1 = {
    'data': [energy[0:end], energy[-end:-50]],
    'title': title1,
    'savefig': path1
}
a1 = rpt.make_plot(d1)

path2 = get_project_root() + '/plots/%s_seqlen.png' % (particle)
title2 = '%s: Seq. length dist (entries: %.2e)' % (particle,
                                                   len(doms[from_i:to_i]))
d2 = {'data': [doms[from_i:to_i]], 'title': title2, 'savefig': path2}
a2 = rpt.make_plot(d2)

# %%
morethan200 = len([entry for entry in doms if entry > 200])
print(morethan200 / len(doms))