def load_hparam_search(name): """ Reads json-formatted hparam search file spec to pandas DF, and loads additional metrics into the dataframe. """ hpath = get_git_root() + "experiments/searches/" df = pd.read_json(hpath + name, orient='index').rename_axis('id').reset_index() # JSON convert the tuples in hparam search to list when it's interpreted. # Convert the values to str to make it workable df['kernel_size'] = [str(x) for x in df['kernel_size'].values] # Add additional metrics to df accs = [] f1 = [] mcc = [] auc = [] for e_id in df['id']: e = load_experiment(e_id) accs.append(e['metrics']['accuracy_score']) f1.append(e['metrics']['f1_score']) mcc.append(e['metrics']['matthews_corrcoef']) auc.append(e['metrics']['roc_auc_score']) df['accuracy_score'] = accs df['f1_score'] = f1 df['matthews_corrcoef'] = mcc df['roc_auc_score'] = auc return df
def set_config(self, config): """ Set default config for model.fit and additional kfold cross- validation arguments. If a config is provided to the function, replace given keys with the ones provided, and add new keys present. """ # Get github repo root path rpath = get_git_root() self.config = { 'fit_args': { 'batch_size': None, 'epochs': 1, 'verbose': 2, }, 'kfold_args': { 'n_splits': 5, 'shuffle': True, }, 'path_args': { 'repo_root': rpath, 'models': rpath + 'models/', 'figures': rpath + 'figures/', 'experiments': rpath + 'experiments/', 'results': rpath + 'results', 'model_config': rpath + 'experiments/model_config/', }, 'random_seed': None, } if config is not None: for major_key in config.keys(): # Treat the value as dict, save directly if not dict try: for k, v in config[major_key].items(): self.config[major_key][k] = v except AttributeError: self.config[major_key] = config[major_key] except KeyError: self.config[major_key] = config[major_key]
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D import tensorflow as tf import numpy as np import json import warnings warnings.filterwarnings('ignore', category=FutureWarning) # ================== Config ======================= with open("results_experiment_config.json", 'r') as fp: config = json.load(fp) # ================== Callbacks ==================== # ================== Import Data ================== DATA_PATH = get_git_root() + "data/simulated/" images = np.load(DATA_PATH + config['data']['images']) images = images.reshape(images.shape[0], 256) energies = np.load(DATA_PATH + config['data']['energies']) positions = np.load(DATA_PATH + config['data']['positions']) single_indices, double_indices, close_indices = event_indices(positions) # log-scale the images if desireable config['scaling'] = "minmax" if "np.log" in config['scaling']: images = np.log1p(images) # set tf random seed tf.random.set_seed(config['random_seed']) # ================== Import Data ================== with tf.device(get_tf_device(20)):
sys.path.append('../../master_scripts') from master_scripts.noise_addition import * import numpy as np from master_scripts.data_functions import get_git_root import json import time #config = { # 'distfile': "ratiodist.txt", # 'imagefile': ["images_training_18000000_202012080020.npy","images_test_2000000_202012080020.npy"] #} with open(sys.argv[1], 'r') as fp: config = json.load(fp) DATA_PATH = get_git_root() + "data/simulated/" DIST_PATH = get_git_root() + "data/real/" for data in config['imagefile']: print(data) print(type(data)) print("Noising " , data, " this may take some time.") start = time.time() images = np.load(DATA_PATH + data).copy() dist = gen_dist(DIST_PATH + config['distfile']) for i in range(0, len(images)): #print("START LOOP") #print(image[0][0:5]) images[i] = images[i].reshape(16,16) * rnoise_gen(dist).reshape(16,16) #print('***************************') #print(image[0][0:5])
def load_experiment(e_id): repo_root = get_git_root() e_path = repo_root + "experiments/" with open(e_path + e_id + ".json", "r") as fp: e = json.load(fp) return e
def anodedata_classification_table(experiment_id, data_name, return_events=False): """Outputs the event dict post-classification as a table :param experiment_id: unique id of experiment :param data_name: filename of datafile without type suffix ex. "anodedata_500k" :param return_events: bool, return the events dict if True """ # Load the event classification results repo_root = get_git_root() fname = repo_root + "results/events_classified_" + data_name + "_" fname += experiment_id + ".json" with open(fname, "r") as fp: events = json.load(fp) # Generate list of unique event descriptors present in the events descriptors = list( set([event['event_descriptor'] for event in events.values()])) # Frequency of each type of descriptor for each event type desc_class = { 'single': [], 'double': [], } for event in events.values(): desc_class[event['event_class']].append(event['event_descriptor']) # Translation dict for event descriptor # Note that not all of these correspond to something that may exists. translate_descriptor = { 1: "Implant", 2: "Decay", 3: "implant + Decay", 4: "Light ion", 5: "Implant + Light Ion", 6: "Decay + Light Ion", 7: "Implant + Decay + Light Ion", 8: "Double (time)", 9: "Implant + Double (time)", 10: "Decay + Double (time)", 11: "Implant + Decay + Double (time)", 12: "Light ion + Double (time)", 13: "Implant + Light Ion + Double (time)", 14: "Decay + Light ion + Double (time)", 15: "Implant + Decay + Light Ion + Double (time)", 16: "Double (space)", 17: "Implant + Double (space)", 18: "Decay + Double (space)" } # Print a table-like structure for viewing print("Classification results for {}:".format(experiment_id)) print( "|Event descriptor | Event type | singles | doubles |" ) print( "| :--- | :---: | :---: | :---: |" ) for d in descriptors: print("|{:^17d}|{:^30s}|{:^9d}|{:^9d}|".format( d, translate_descriptor[d], desc_class['single'].count(d), desc_class['double'].count(d))) if return_events: return events
import numpy as np import json import warnings warnings.filterwarnings('ignore', category=FutureWarning) # ================== Config ======================= config = { 'fit_args': { 'epochs': 20, 'batch_size': 32, }, 'random_seed': 120, } # ================== Import Data ================== DATA_PATH = get_git_root() + "data/simulated/" images = np.load(DATA_PATH + "images_200k.npy") images = images.reshape(images.shape[0], 16, 16, 1) labels = np.load(DATA_PATH + "labels_200k.npy") x_idx = np.arange(images.shape[0]) train_idx, val_idx, u1, u2 = train_test_split( x_idx, x_idx, random_state=config['random_seed']) # ================== Search params ================ batch_sizes = [32, 64, 128, 256] # set tf random seed tf.random.set_seed(config['random_seed']) id_param = {} search_name = "batch_size_deeper"
# ============================================================================ # Modifies pixels in the simulated detector images in order to make it more # similar to the real data. # In anodedata_500k.txt pixel 3, 13 is set to 0 due to being unreliably noisy # The same is the case for the bordering pixels y=0 and y=15 # ============================================================================ import numpy as np from master_scripts.data_functions import get_git_root repo_root = get_git_root() images = np.load(repo_root + "data/simulated/images_full.npy") images[:, 3, 13] = 0 images[:, 0, :] = 0 images[:, 15, :] = 0 np.save(repo_root + "data/simulated/images_full_pixelmod.npy", images)