def evaluation(x, y): y = tf.cast(y, dtype=tf.float32) y_pred = predict(x) predict_accuracy = tf.keras.metrics.BinaryAccuracy(name='predict_accuracy') acc = predict_accuracy(y, y_pred) mi_f1 = micro_f1(y, y_pred) ma_f1 = macro_f1(y, y_pred) print("val accuracy {:.4f}, micro f1 {:.4f} macro f1 {:.4f}".format( acc.numpy(), mi_f1.numpy(), ma_f1.numpy())) return acc, mi_f1, ma_f1
def test(model,x_test,y_test): print('Start Testing......') y_pred = model.predict(x_test) y_pred = tf.constant(y_pred,tf.float32) y_test = tf.constant(y_test,tf.float32) print(micro_f1(y_test,y_pred)) print(macro_f1(y_test,y_pred)) print(classification_report(y_test,y_pred))
def on_epoch_end(self, epoch, logs={}): train_predict = np.asarray(self.model.predict(self.train_x)) train_predictions_indices = [ example_pred_probs.tolist().index(max(example_pred_probs)) for example_pred_probs in train_predict ] train_predictions = [ self.labels[prediction] for prediction in train_predictions_indices ] train_targets = [self.labels[target] for target in self.train_y] logs['macro_f1'] = macro_f1(train_targets, train_predictions) logs['macro_recall'] = macro_recall(train_targets, train_predictions) logs['mae'] = mae(train_targets, train_predictions) logs['macro_averaged_mae'] = macro_averaged_mae( train_targets, train_predictions) val_data = self.validation_data[:self.num_inputs] val_predict = np.asarray(self.model.predict(val_data)) predictions_indices = [ example_pred_probs.tolist().index(max(example_pred_probs)) for example_pred_probs in val_predict ] predictions = [ self.labels[prediction] for prediction in predictions_indices ] val_targ = flatten(self.validation_data[self.num_inputs]) targets = [self.labels[target] for target in val_targ] logs['val_macro_f1'] = macro_f1(targets, predictions) logs['val_macro_recall'] = macro_recall(targets, predictions) logs['val_mae'] = mae(targets, predictions) logs['val_macro_averaged_mae'] = macro_averaged_mae( targets, predictions)
def train_step(x, y): enc_padding_mask = create_padding_mask(x) with tf.GradientTape() as tape: y_pred = model(x, training=True, enc_padding_mask=enc_padding_mask) loss = loss_object(y, y_pred) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_loss(loss) train_accuracy(y, y_pred) mi_f1 = micro_f1(y, y_pred) ma_f1 = macro_f1(y, y_pred) return mi_f1, ma_f1, y_pred
def evaluate(test_dataset): predictions = [] tars = [] for (batch, (inp, tar)) in tqdm(enumerate(test_dataset)): enc_padding_mask = create_padding_mask(inp) predict = transformer(inp, False, enc_padding_mask=enc_padding_mask) predictions.append(predict) tars.append(tar) predictions = tf.concat(predictions, axis=0) tars = tf.concat(tars, axis=0) mi_f1 = micro_f1(tars, predictions) ma_f1 = macro_f1(tars, predictions) predictions = np.where(predictions > 0.5, 1, 0) tars = np.where(tars > 0.5, 1, 0) smaple_f1 = f1_score(tars, predictions, average='samples') return mi_f1, ma_f1, smaple_f1, tars, predictions
def train_step(inp, tar): enc_padding_mask = create_padding_mask(inp) with tf.GradientTape() as tape: predictions = transformer(inp, training=True, enc_padding_mask=enc_padding_mask) loss = loss_function(tar, predictions) gradients = tape.gradient(loss, transformer.trainable_variables) optimizer.apply_gradients(zip(gradients, transformer.trainable_variables)) train_loss(loss) train_accuracy(tar, predictions) mi_f1 = micro_f1(tar, predictions) ma_f1 = macro_f1(tar, predictions) return mi_f1, ma_f1
def metric(self, logit, truth, device='gpu'): """Define metrics for evaluation especially for early stoppping.""" return macro_f1(logit, truth, device=device)
def f(split): print("Current split: ", split) standardize = True scatter_type = str(sys.argv[2]) train_dir = "/home/laura/MedleyDB/processed/" + scatter_type + "/train" val_dir = "/home/laura/MedleyDB/processed/" + scatter_type + "/val" test_dir = "/home/laura/MedleyDB/processed/" + scatter_type + "/test" if "reduced" in scatter_type.split("_"): print("5 instruments") classes_num = 5 else: print("16 instruments") classes_num = 16 if "logmel" in scatter_type.split("_"): print("Logmel data") if standardize: print("Standardization") mean_logmel, var_logmel = get_mean_var(train_dir, split) else: print("No standardization") mean_logmel, var_logmel = [None, None] time_steps = 827 freq_bins = 64 train_dataset = medleyDataset_logmel( train_dir, time_steps, freq_bins, classes_num, mean_logmel, var_logmel, split, ) val_dataset = medleyDataset_logmel( val_dir, time_steps, freq_bins, classes_num, mean_logmel, var_logmel, 1, ) test_dataset = medleyDataset_logmel( test_dir, time_steps, freq_bins, classes_num, mean_logmel, var_logmel, 1, ) else: print("Scatter data") if "9_8_132300" in scatter_type.split("_reduced"): input_length = 259 order1_length = 62 order2_length = 237 elif "6_8_33075" in scatter_type.split("_reduced"): input_length = 517 order1_length = 38 order2_length = 87 if standardize: print("Standardization") mean_order1, var_order1, mean_order2, var_order2 = get_mean_var( train_dir, split) else: print("No standardization") mean_order1, var_order1, mean_order2, var_order2 = [ None, None, None, None, ] train_dataset = medleyDataset( train_dir, input_length, order1_length, order2_length, classes_num, mean_order1, var_order1, mean_order2, var_order2, split, ) val_dataset = medleyDataset( val_dir, input_length, order1_length, order2_length, classes_num, mean_order1, var_order1, mean_order2, var_order2, 1, ) test_dataset = medleyDataset( test_dir, input_length, order1_length, order2_length, classes_num, mean_order1, var_order1, mean_order2, var_order2, 1, ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=64, num_workers=os.cpu_count(), shuffle=True, drop_last=True, ) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=os.cpu_count(), shuffle=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, num_workers=os.cpu_count(), shuffle=True) if "logmel" in scatter_type.split("_"): print("Creating Cnn6 model") model = Cnn6( classes_num=classes_num, time_steps=time_steps, freq_bins=freq_bins, spec_aug=False, ) else: print("Creating CNN_two model") model = CNN_two( classes_num=classes_num, input_length=input_length, order1_length=order1_length, order2_length=order2_length, ) model.cuda() loss_func = nn.BCELoss() optimizer = optim.Adam(model.parameters()) losses_train = [] losses_val = [] f1_val = [] epoch = 0 weight_updates = 0 threshold = 0.5 directory_save = ("/home/laura/thesis/two_inputs/models/" + str(sys.argv[1]) + "/" + scatter_type + "/" + str(round(split * 100))) max_epoch = 100 early_stop = False plot = False dropout = True while not early_stop: running_loss = 0.0 is_best = False for batch_data_dict in train_loader: if "logmel" in scatter_type.split("_"): batch_input = batch_data_dict["logmel"].cuda() batch_target = batch_data_dict["target"].cuda() batch_output_dict = model(batch_input, dropout) else: batch_input1 = batch_data_dict["order1"].cuda() batch_input2 = batch_data_dict["order2"].cuda() batch_target = batch_data_dict["target"].cuda() batch_output_dict = model(batch_input1, batch_input2, plot, dropout) batch_target_dict = {"target": batch_target} loss = loss_func(batch_output_dict["clipwise_output"], batch_target_dict["target"]) # Backward loss.backward() running_loss += loss.item() * train_loader.batch_size optimizer.step() weight_updates += 1 optimizer.zero_grad() epoch_loss = running_loss / len(train_dataset) losses_train.append(epoch_loss) plot = False model.eval() val_loss, batches_target, batches_pred = evaluate( model, val_loader, scatter_type, loss_func, threshold) losses_val.append(val_loss) if epoch > max_epoch and losses_val[-1] > min(losses_val): early_stop = True f1_score = macro_f1(batches_target, batches_pred) f1_val.append(f1_score) if min(losses_val) == val_loss: f1_instr_val = instrument_f1(batches_target, batches_pred) losses_test, batches_target_test, batches_pred_test = evaluate( model, test_loader, scatter_type, loss_func, threshold) f1_instr_test = instrument_f1(batches_target_test, batches_pred_test) is_best = True save_checkpoint( { "epoch": epoch + 1, "weight_updates": weight_updates, "state_dict": model.state_dict(), "train_losses": losses_train, "val_losses": losses_val, "test_loss": losses_test, "f1_instr_val": f1_instr_val, "f1_instr_test": f1_instr_test, "macro_f1_val": f1_val[np.argmin(losses_val)], "split": split, "optimizer": optimizer.state_dict(), }, is_best, directory_save, ) model.train() epoch += 1
def metric(self, logit, truth): """Define metrics for evaluation especially for early stoppping.""" if self.training and self.aux_logits: logit = logit[0] return macro_f1(logit, truth) return macro_f1(logit, truth)
def predict(inp, tar, enc_padding_mask): predictions = transformer(inp, False, enc_padding_mask=enc_padding_mask) mi_f1 = micro_f1(tar, predictions) ma_f1 = macro_f1(tar, predictions) return mi_f1, ma_f1