Beispiel #1
0
    def run(self, x, y):
        """ Single training run of the given model. It is assumed that the
        input data is preprocessed, normalized, and good to go.

        :param x:   input data (batch_size, width, height, channels)
        :param y:   labels
        """

        # Set indices for train and validation
        x_idx = np.arange(x.shape[0])
        train_idx, val_idx = train_test_split(
            x_idx, random_state=self.config['random_seed'])

        # Train the model
        self.history = self.model.fit(
            x=normalize_image_data(x[train_idx]),
            y=y[train_idx],
            validation_data=(normalize_image_data(x[val_idx]), y[val_idx]),
            **self.config['fit_args'],
        ).history

        # Calculate metrics for the model
        if self.model_type == "classification":
            self.classification_metrics(x[val_idx], y[val_idx])
        elif self.model_type == "regression":
            self.regression_metrics(x[val_idx], y[val_idx])

        # Store indices for training and validation in config output
        # Need conversion to list as numpy arrays aren't json serializable.
        # To make the indices in config more uniform in format, we treat
        # a non-kfold experiment like a 1-fold experiment.
        self.indices['fold_0'] = {
            'train_idx': train_idx.tolist(),
            'val_idx': val_idx.tolist(),
        }
Beispiel #2
0
    def run_kfold(self, x, y, f1_print=False):
        """ Train the model using kfold cross-validation.
        It is assumed that the input data is preprocessed,
        and good to go.

        :param x:   input data (batch_size, width, height, channels)
        :param y:   labels / targets
        :param f1_print: whether to print f1 score after each fold or not.
        """

        # Store accuracy for each fold for all models
        results = {}

        # Create KFold data generator
        kf = KFold(random_state=self.config['random_seed'],
                   **self.config['kfold_args'])

        original_model = tf.keras.models.clone_model(self.model)
        # Run k-fold cross-validation
        fold = 0  # Track which fold
        for train_idx, val_idx in kf.split(x, y):
            # Reinitialize model
            self.model = tf.keras.models.clone_model(original_model)
            self.model.compile(optimizer=Adam(
                learning_rate=self.config['compile_args']['adam_lr']),
                               loss=self.config['compile_args']['loss'],
                               metrics=self.config['compile_args']['metrics'])
            # Train model
            history = self.model.fit(
                x=normalize_image_data(x[train_idx]),
                y=y[train_idx],
                validation_data=(normalize_image_data(x[val_idx]), y[val_idx]),
                **self.config['fit_args'],
            ).history
            # Calculate metrics for the model
            if self.model_type == "classification":
                self.classification_metrics(x[val_idx], y[val_idx], fold)
            elif self.model_type == "regression":
                self.regression_metrics(x[val_idx], y[val_idx], fold)

            # Store train and val indices for the fold
            foldkey = 'fold_' + str(fold)
            self.indices[foldkey] = {
                'train_idx': train_idx.tolist(),
                'val_idx': val_idx.tolist(),
            }
            # Store the history object
            results[foldkey] = history

            if f1_print:
                print("\n", foldkey, " F1-score: ",
                      self.metrics_kfold[foldkey]['f1_score'])

            fold += 1
        self.history_kfold = results
Beispiel #3
0
    def classification_metrics(self, x_val, y_val, fold=None):
        """ Calculates f1_score, matthews_corrcoef, confusion matrix and
        roc area under curve, accuracy metrics and stores them in the
        metrics attribute.
        The values are calculated based on the validation data.

        Recall that the default positive class for f1_score is 1
        """

        # Get prediction and make class labels based on threshold of 0.5
        y_out = self.model.predict(normalize_image_data(x_val))
        y_pred = y_out > 0.5
        confmat = confusion_matrix(y_val, y_pred)

        metrics = {}
        metrics['accuracy_score'] = accuracy_score(y_val, y_pred)
        metrics['confusion_matrix'] = {
            'TN': int(confmat[0, 0]),
            'FP': int(confmat[0, 1]),
            'FN': int(confmat[1, 0]),
            'TP': int(confmat[1, 1]),
        }
        metrics['f1_score'] = f1_score(y_val, y_pred)
        metrics['matthews_corrcoef'] = matthews_corrcoef(y_val, y_pred)
        metrics['roc_auc_score'] = roc_auc_score(y_val, y_out)

        if fold is not None:
            foldkey = 'fold_' + str(fold)
            self.metrics_kfold[foldkey] = metrics
        else:
            self.metrics = metrics
Beispiel #4
0
    def regression_metrics(self, x_val, y_val, fold=None):
        """ Calculates regression metrics on the validation data.
        """

        # Get prediction and make class labels based on threshold of 0.5
        y_pred = self.model.predict(normalize_image_data(x_val))

        metrics = {}
        metrics['r2_score'] = r2_score(y_val, y_pred)
        metrics['mse'] = mean_squared_error(y_val, y_pred)
        metrics['rmse'] = mean_squared_error(y_val, y_pred, squared=False)
        metrics['mae'] = mean_absolute_error(y_val, y_pred)
        if fold is not None:
            foldkey = 'fold_' + str(fold)
            self.metrics_kfold[foldkey] = metrics
        else:
            self.metrics = metrics
    'data': "200k",
}

# ================== Import Data ==================
DATA_PATH = get_git_root() + "data/simulated/"
images = np.load(DATA_PATH + f"images_{config['data']}.npy")
images = images.reshape(images.shape[0], 16, 16, 1)
positions = np.load(DATA_PATH + "positions_200k.npy")
labels = np.load(DATA_PATH + "labels_200k.npy")

single_indices, double_indices, close_indices = event_indices(positions)
train_idx, val_idx, non1, non2 = train_test_split(
    double_indices, double_indices, random_state=config['random_seed'])
# log-scale the images if desireable
config['scaling'] = "minmax"
# set tf random seed
with tf.device(get_tf_device(20)):
	reg = ak.ImageRegressor(
	    overwrite=True,
	    max_trials=100,
	)
	# Feed the structured data regressor with training data.
	reg.fit(
	    normalize_image_data(images[train_idx]),
	    normalize_position_data(positions[train_idx]),
	    validation_data=(normalize_image_data(images[val_idx]), normalize_position_data(positions[val_idx])),
	    epochs=10,
	)
	predicted_y = reg.predict(normalize_image_data(images[val_idx]))
	print(reg.evaluate(normalize_image_data(images[val_idx]), normalize_position_data(positions[val_idx])))
Beispiel #6
0
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))

        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        # Run experiment
        experiment = Experiment(model=model,
                                config=config,
                                model_type="classification",
                                experiment_name=search_name)
        experiment.run(
            normalize_image_data(images[train_idx]),
            labels[train_idx],
            normalize_image_data(images[val_idx]),
            labels[val_idx],
        )
        experiment.save()
        id_param[experiment.id] = {
            'batch_size': b_size,
        }
search_path = get_git_root() + "experiments/searches/"
with open(search_path + search_name + ".json", "w") as fp:
    json.dump(id_param, fp, indent=2)
    'ENERGY_MODEL': "7eb0ab25bf53.h5",
    'POSITIONS_MODEL': "7eb0ab25bf53.h5",
}

# Note this is not "real data" this is a test sanity check the models
#NPFILE = "../../data/simulated/CeBr10k_1.npy"

NPFILE = "anodedata_500k.npy"

print(config['DATA_PATH'] + NPFILE)

events, images = import_real_data(
    config['DATA_PATH'] + config['DATA_FILENAME'])  # Images not normalized
#images = np.load(config['DATA_PATH'] + NPFILE)
images = images.reshape(images.shape[0], 16, 16, 1)
images = normalize_image_data(images)  # Normalize images

descriptors = list(
    set([event['event_descriptor'] for event in events.values()]))

# Load models
model = tf.keras.models.load_model(config['MODEL_PATH'] + config['CLASSIFIER'])

# Classify events
prediction = model.predict(images)
event_classification = (prediction > 0.5).astype(int)
for event_id in events.keys():
    if event_classification[events[event_id]['image_idx']] == 0:
        events[event_id]['event_class'] = "single"
    else:
        events[event_id]['event_class'] = "double"
Beispiel #8
0
# import real data
config_real = {
    'DATA_PATH': "../../data/real/",
    'DATA_FILENAME': "anodedata_500k.txt",
    'MODEL_PATH': "../../models/",
    'RESULTS_PATH': "../../results/",
    'CLASSIFIER': "367e35da671b.h5",
    'ENERGY_MODEL': "2137bd6d101c.h5",
    'POSITIONS_MODEL': "337cafc233f7.h5",
}

events, images_real = import_real_data(
    config_real['DATA_PATH'] +
    config_real['DATA_FILENAME'])  # Images not normalized
images_real = images_real.reshape(images_real.shape[0], 16, 16, 1)
images_real = normalize_image_data(images_real)  # Normalize images

# log-scale the images if desireable
config['scaling'] = "minmax"
if "np.log" in config['scaling']:
    images = np.log1p(images)

# set tf random seed
tf.random.set_seed(config['random_seed'])
experiments = {}
with tf.device(get_tf_device(20)):
    models = {}
    # Logistic
    model = Sequential()
    model.add(InputLayer(input_shape=(256, )))
    model.add(Dense(1, activation='sigmoid'))
Beispiel #9
0
for config in configs:

    images = np.load(config['DATA_PATH'] + config['IMAGE_FILE'])
    labels = np.load(config['DATA_PATH'] + config['LABEL_FILE'])
    #print("DEBUG: ", labels)
    print("DEBUG: ")
    print("CLASSIFIER: ", config['CLASSIFIER'])
    print("IMAGE_FILE: ", config['IMAGE_FILE'])

    if config['ML_METHOD'] == 'CNN':
        images = images.reshape(images.shape[0],16,16,1)
    else:
        images = images.reshape(images.shape[0], 256)

    model = tf.keras.models.load_model(config['MODEL_PATH'] + config['CLASSIFIER'])

    pred = model.predict(normalize_image_data(images))

    result = pred > 0.5

    accuracy = accuracy_score(labels, result)
    confmat = confusion_matrix(labels, result)
    f1 = f1_score(labels, result)
    mcc = matthews_corrcoef(labels, result)

    print("Model:", config['NAME'])
    print("Confusion matrix:\n", confmat)
    print("Accuracy:", accuracy)
    print("F1-score:", f1)
    print("MCC:", mcc)
Beispiel #10
0
    prediction_model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer='adam',
        loss='mse',
    )
    prediction_model.compile(
        optimizer='adam',
        loss='mse',
    )
    print(model.summary())

    # Run experiment
    experiment = Experiment(model=model,
                            config=config,
                            model_type="regression",
                            experiment_name=search_name)
    experiment.run(
        normalize_image_data(images[single_indices]),
        normalize_position_data(positions[single_indices])[:, :2],
    )
    experiment.save()
    mpath = experiment.config['path_args']['models'] + experiment.id + ".h5"
    prediction_model.save(mpath)
    heatmaps, coords = prediction_model.predict(
        images[single_indices][experiment.indices['fold_0']['val_idx']])
    np.save("dsnt_heatmaps_pred.npy", heatmaps)
    np.save("dsnt_coords_pred.npy", coords)
    print("Finished experiment.")
    print("Name:", search_name)
    print("id:", experiment.id)
Beispiel #11
0
        Conv2D(32,
               kernel_size=(3, 3),
               activation='relu',
               input_shape=(16, 16, 1),
               padding=padding))
    model.add(Conv2D(64, (3, 3), activation='relu', padding=padding))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu', padding=padding))
    model.add(Conv2D(64, (3, 3), activation='relu', padding=padding))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(4, activation='linear'))
    model.compile(
        loss='mse',
        optimizer='adam',
    )
    print(model.summary())

    # Run experiment
    experiment = Experiment(model=model,
                            config=config,
                            model_type="regression",
                            experiment_name=search_name)
    experiment.run(
        normalize_image_data(images[double_indices]),
        normalize_position_data(positions[double_indices]),
    )
    experiment.save()
    mpath = experiment.config['path_args']['models'] + experiment.id + ".h5"
    model.save(mpath)