예제 #1
0
def load_hparam_search(name):
    """ Reads json-formatted hparam search file spec to pandas DF,
    and loads additional metrics into the dataframe.
    """
    hpath = get_git_root() + "experiments/searches/"
    df = pd.read_json(hpath + name,
                      orient='index').rename_axis('id').reset_index()
    # JSON convert the tuples in hparam search to list when it's interpreted.
    # Convert the values to str to make it workable
    df['kernel_size'] = [str(x) for x in df['kernel_size'].values]
    # Add additional metrics to df
    accs = []
    f1 = []
    mcc = []
    auc = []
    for e_id in df['id']:
        e = load_experiment(e_id)
        accs.append(e['metrics']['accuracy_score'])
        f1.append(e['metrics']['f1_score'])
        mcc.append(e['metrics']['matthews_corrcoef'])
        auc.append(e['metrics']['roc_auc_score'])
    df['accuracy_score'] = accs
    df['f1_score'] = f1
    df['matthews_corrcoef'] = mcc
    df['roc_auc_score'] = auc
    return df
예제 #2
0
 def set_config(self, config):
     """ Set default config for model.fit and additional kfold cross-
     validation arguments. If a config is provided to the function,
     replace given keys with the ones provided, and add new keys present.
     """
     # Get github repo root path
     rpath = get_git_root()
     self.config = {
         'fit_args': {
             'batch_size': None,
             'epochs': 1,
             'verbose': 2,
         },
         'kfold_args': {
             'n_splits': 5,
             'shuffle': True,
         },
         'path_args': {
             'repo_root': rpath,
             'models': rpath + 'models/',
             'figures': rpath + 'figures/',
             'experiments': rpath + 'experiments/',
             'results': rpath + 'results',
             'model_config': rpath + 'experiments/model_config/',
         },
         'random_seed': None,
     }
     if config is not None:
         for major_key in config.keys():
             # Treat the value as dict, save directly if not dict
             try:
                 for k, v in config[major_key].items():
                     self.config[major_key][k] = v
             except AttributeError:
                 self.config[major_key] = config[major_key]
             except KeyError:
                 self.config[major_key] = config[major_key]
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D
import tensorflow as tf
import numpy as np
import json
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)

# ================== Config =======================
with open("results_experiment_config.json", 'r') as fp:
    config = json.load(fp)

# ================== Callbacks ====================

# ================== Import Data ==================
DATA_PATH = get_git_root() + "data/simulated/"
images = np.load(DATA_PATH + config['data']['images'])
images = images.reshape(images.shape[0], 256)
energies = np.load(DATA_PATH + config['data']['energies'])
positions = np.load(DATA_PATH + config['data']['positions'])

single_indices, double_indices, close_indices = event_indices(positions)
# log-scale the images if desireable
config['scaling'] = "minmax"
if "np.log" in config['scaling']:
    images = np.log1p(images)

# set tf random seed
tf.random.set_seed(config['random_seed'])
# ================== Import Data ==================
with tf.device(get_tf_device(20)):
예제 #4
0
sys.path.append('../../master_scripts')
from master_scripts.noise_addition import *
import numpy as np
from master_scripts.data_functions import get_git_root
import json
import time

#config = {
#    'distfile': "ratiodist.txt",
#    'imagefile': ["images_training_18000000_202012080020.npy","images_test_2000000_202012080020.npy"]
#}

with open(sys.argv[1], 'r') as fp:
    config = json.load(fp)

DATA_PATH = get_git_root() + "data/simulated/"
DIST_PATH = get_git_root() + "data/real/"

for data in config['imagefile']:
    print(data)
    print(type(data))
    print("Noising " , data, " this may take some time.")
    start = time.time()
    images = np.load(DATA_PATH + data).copy()
    dist = gen_dist(DIST_PATH + config['distfile'])
    for i in range(0, len(images)):
        #print("START LOOP")
        #print(image[0][0:5])
        images[i] = images[i].reshape(16,16) * rnoise_gen(dist).reshape(16,16)
        #print('***************************')
        #print(image[0][0:5])
예제 #5
0
def load_experiment(e_id):
    repo_root = get_git_root()
    e_path = repo_root + "experiments/"
    with open(e_path + e_id + ".json", "r") as fp:
        e = json.load(fp)
    return e
예제 #6
0
def anodedata_classification_table(experiment_id,
                                   data_name,
                                   return_events=False):
    """Outputs the event dict post-classification as a table

    :param experiment_id:   unique id of experiment
    :param data_name:   filename of datafile without type suffix
                        ex. "anodedata_500k"
    :param return_events: bool, return the events dict if True
    """
    # Load the event classification results
    repo_root = get_git_root()
    fname = repo_root + "results/events_classified_" + data_name + "_"
    fname += experiment_id + ".json"
    with open(fname, "r") as fp:
        events = json.load(fp)

    # Generate list of unique event descriptors present in the events
    descriptors = list(
        set([event['event_descriptor'] for event in events.values()]))

    # Frequency of each type of descriptor for each event type
    desc_class = {
        'single': [],
        'double': [],
    }
    for event in events.values():
        desc_class[event['event_class']].append(event['event_descriptor'])

    # Translation dict for event descriptor
    # Note that not all of these correspond to something that may exists.
    translate_descriptor = {
        1: "Implant",
        2: "Decay",
        3: "implant + Decay",
        4: "Light ion",
        5: "Implant + Light Ion",
        6: "Decay + Light Ion",
        7: "Implant + Decay + Light Ion",
        8: "Double (time)",
        9: "Implant + Double (time)",
        10: "Decay + Double (time)",
        11: "Implant + Decay + Double (time)",
        12: "Light ion + Double (time)",
        13: "Implant + Light Ion + Double (time)",
        14: "Decay + Light ion + Double (time)",
        15: "Implant + Decay + Light Ion + Double (time)",
        16: "Double (space)",
        17: "Implant + Double (space)",
        18: "Decay + Double (space)"
    }

    # Print a table-like structure for viewing
    print("Classification results for {}:".format(experiment_id))
    print(
        "|Event descriptor | Event type                   | singles | doubles |"
    )
    print(
        "| :---            |  :---:                       | :---:   | :---:   |"
    )
    for d in descriptors:
        print("|{:^17d}|{:^30s}|{:^9d}|{:^9d}|".format(
            d, translate_descriptor[d], desc_class['single'].count(d),
            desc_class['double'].count(d)))

    if return_events:
        return events
예제 #7
0
import numpy as np
import json
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# ================== Config =======================
config = {
    'fit_args': {
        'epochs': 20,
        'batch_size': 32,
    },
    'random_seed': 120,
}

# ================== Import Data ==================
DATA_PATH = get_git_root() + "data/simulated/"
images = np.load(DATA_PATH + "images_200k.npy")
images = images.reshape(images.shape[0], 16, 16, 1)
labels = np.load(DATA_PATH + "labels_200k.npy")

x_idx = np.arange(images.shape[0])
train_idx, val_idx, u1, u2 = train_test_split(
    x_idx, x_idx, random_state=config['random_seed'])

# ================== Search params ================
batch_sizes = [32, 64, 128, 256]

# set tf random seed
tf.random.set_seed(config['random_seed'])
id_param = {}
search_name = "batch_size_deeper"
예제 #8
0
# ============================================================================
# Modifies pixels in the simulated detector images in order to make it more
# similar to the real data.
# In anodedata_500k.txt pixel 3, 13 is set to 0 due to being unreliably noisy
# The same is the case for the bordering pixels y=0 and y=15
# ============================================================================
import numpy as np
from master_scripts.data_functions import get_git_root

repo_root = get_git_root()

images = np.load(repo_root + "data/simulated/images_full.npy")

images[:, 3, 13] = 0
images[:, 0, :] = 0
images[:, 15, :] = 0

np.save(repo_root + "data/simulated/images_full_pixelmod.npy", images)