Esempio n. 1
0
        print("Haven't seen any new events in %i files, quitting..."%quit_files)
        break

    if np.all(count_energy >= max_per_bin):
        print("All bins filled, quitting...")
        break
    else:
        print(count_energy)

if shuffle == True:
    print("Finished concatonating all the files. Now I will shuffle..")
    from handle_data import Shuffler
    full_features_DC, full_features_IC, full_labels, \
    full_reco, full_stats, full_pulses_per_dom, full_trig_times = \
    Shuffler(full_features_DC,full_features_IC,full_labels, \
    full_reco=full_reco, full_initial_stats=full_stats, \
    full_num_pulses=full_pulses_per_dom, full_trig_times=full_trig_times, \
    use_old_reco_flag=use_old_reco)

    over_emax = full_labels[:,0] > emax
    under_emin = full_labels[:,0] < emin
    assert sum(over_emax)==0, "Have events greater than emax in final sample"
    assert sum(under_emin)==0, "Have events less than emin in final sample"
    if cut_name == "CC":
        isCC = full_labels[:,11] == 1
        assert sum(isCC)==full_labels.shape[0], "Have NC events in data"

#Save output to hdf5 file
print(count_energy)
print("Total events saved: %i"%full_features_DC.shape[0])
events_per_file = int(full_features_DC.shape[0]/num_outputs) + 1
for sep_file in range(0,num_outputs):
        else:
            full_num_pulses = np.concatenate(
                (full_num_pulses, file_num_pulses[keep_index]))

    print(
        "Events this file: %i, Saved this file: %i, Cumulative saved: %i\n Finsihed file: %s"
        % (number_events, np.count_nonzero(keep_index), full_labels.shape[0],
           a_file))

if shuffle:
    print("Finished concatonating all the files. Now I will shuffle..")
    from handle_data import Shuffler

    shuffled_features_DC, shuffled_features_IC, shuffled_labels, \
    shuffled_reco, shuffled_initial_stats, shuffled_num_pulses = \
    Shuffler(full_features_DC,full_features_IC,full_labels, \
    full_reco, full_initial_stats,full_num_pulses,use_old_reco_flag=use_old_reco)
else:
    shuffled_features_DC, shuffled_features_IC, shuffled_labels, \
    shuffled_reco, shuffled_initial_stats, shuffled_num_pulses = \
    full_features_DC,full_features_IC,full_labels, \
    full_reco, full_initial_stats,full_num_pulses

if find_quartiles:
    from get_statistics import GetQuartilesList
    from scaler_transformations import new_transform
    #low_stat = q1, high_stat = max
    low_stat_DC, high_stat_DC = GetQuartilesList(full_features_DC)
    low_stat_DC = new_tranform(low_stat_DC)
    high_stat_DC = new_tranform(high_stat_DC)
    low_stat_IC, high_stat_IC = GetQuartilesList(full_features_IC)
    low_stat_IC = new_tranform(low_stat_IC)
Esempio n. 3
0
    features_IC = np.array(features_IC)[keep_index]
    labels = np.array(labels)[keep_index]
    if use_old_reco:
        old_reco = np.array(old_reco)[keep_index]

    print("Keeping %i events" % (number_events))
    print(features_DC.shape)

#Shuffle Option
if shuffle:
    print("Starting shuffle...")
    from handle_data import Shuffler

    features_DC, features_IC, labels, \
    old_reco, initial_stats, num_pulses = \
    Shuffler(features_DC,features_IC,labels, \
    old_reco, initial_stats, num_pulses, use_old_reco_flag=use_old_reco)

    print("Finished shuffling...")

#Transform Input Data
print("Starting transformation of input features...")
from scaler_transformations import TransformData, new_transform

features_DC_partial_transform = new_transform(features_DC)
del features_DC
features_DC_full_transform = TransformData(features_DC_partial_transform,
                                           low_stats=low_stat_DC,
                                           high_stats=high_stat_DC,
                                           scaler=transform)
del features_DC_partial_transform
print("Finished DC")