print("Haven't seen any new events in %i files, quitting..."%quit_files) break if np.all(count_energy >= max_per_bin): print("All bins filled, quitting...") break else: print(count_energy) if shuffle == True: print("Finished concatonating all the files. Now I will shuffle..") from handle_data import Shuffler full_features_DC, full_features_IC, full_labels, \ full_reco, full_stats, full_pulses_per_dom, full_trig_times = \ Shuffler(full_features_DC,full_features_IC,full_labels, \ full_reco=full_reco, full_initial_stats=full_stats, \ full_num_pulses=full_pulses_per_dom, full_trig_times=full_trig_times, \ use_old_reco_flag=use_old_reco) over_emax = full_labels[:,0] > emax under_emin = full_labels[:,0] < emin assert sum(over_emax)==0, "Have events greater than emax in final sample" assert sum(under_emin)==0, "Have events less than emin in final sample" if cut_name == "CC": isCC = full_labels[:,11] == 1 assert sum(isCC)==full_labels.shape[0], "Have NC events in data" #Save output to hdf5 file print(count_energy) print("Total events saved: %i"%full_features_DC.shape[0]) events_per_file = int(full_features_DC.shape[0]/num_outputs) + 1 for sep_file in range(0,num_outputs):
else: full_num_pulses = np.concatenate( (full_num_pulses, file_num_pulses[keep_index])) print( "Events this file: %i, Saved this file: %i, Cumulative saved: %i\n Finsihed file: %s" % (number_events, np.count_nonzero(keep_index), full_labels.shape[0], a_file)) if shuffle: print("Finished concatonating all the files. Now I will shuffle..") from handle_data import Shuffler shuffled_features_DC, shuffled_features_IC, shuffled_labels, \ shuffled_reco, shuffled_initial_stats, shuffled_num_pulses = \ Shuffler(full_features_DC,full_features_IC,full_labels, \ full_reco, full_initial_stats,full_num_pulses,use_old_reco_flag=use_old_reco) else: shuffled_features_DC, shuffled_features_IC, shuffled_labels, \ shuffled_reco, shuffled_initial_stats, shuffled_num_pulses = \ full_features_DC,full_features_IC,full_labels, \ full_reco, full_initial_stats,full_num_pulses if find_quartiles: from get_statistics import GetQuartilesList from scaler_transformations import new_transform #low_stat = q1, high_stat = max low_stat_DC, high_stat_DC = GetQuartilesList(full_features_DC) low_stat_DC = new_tranform(low_stat_DC) high_stat_DC = new_tranform(high_stat_DC) low_stat_IC, high_stat_IC = GetQuartilesList(full_features_IC) low_stat_IC = new_tranform(low_stat_IC)
features_IC = np.array(features_IC)[keep_index] labels = np.array(labels)[keep_index] if use_old_reco: old_reco = np.array(old_reco)[keep_index] print("Keeping %i events" % (number_events)) print(features_DC.shape) #Shuffle Option if shuffle: print("Starting shuffle...") from handle_data import Shuffler features_DC, features_IC, labels, \ old_reco, initial_stats, num_pulses = \ Shuffler(features_DC,features_IC,labels, \ old_reco, initial_stats, num_pulses, use_old_reco_flag=use_old_reco) print("Finished shuffling...") #Transform Input Data print("Starting transformation of input features...") from scaler_transformations import TransformData, new_transform features_DC_partial_transform = new_transform(features_DC) del features_DC features_DC_full_transform = TransformData(features_DC_partial_transform, low_stats=low_stat_DC, high_stats=high_stat_DC, scaler=transform) del features_DC_partial_transform print("Finished DC")