def run_batch_validation2(model, weights, batchDir, dataDir, plotDir, batch_size): print("------------------STARTING VALIDATION--------------------") model.load_weights(weights) # load the batches used to train and validate val_e_file_batches = np.load(batchDir + 'e_files_valBatches.npy', allow_pickle=True) val_e_event_batches = np.load(batchDir + 'e_events_valBatches.npy', allow_pickle=True) val_bkg_file_batches = np.load(batchDir + 'bkg_files_valBatches.npy', allow_pickle=True) val_bkg_event_batches = np.load(batchDir + 'bkg_events_valBatches.npy', allow_pickle=True) print("Define Generator") val_generator = generator(val_e_file_batches, val_bkg_file_batches, val_e_event_batches, val_bkg_event_batches, batch_size, dataDir, True, False, True) print("Reset Generator") val_generator.reset() print("Get Predictions") predictions = model.predict(val_generator, verbose=2) true = val_generator.get_y_batches() print("Get Indices of Events") indices = val_generator.get_indices_batches() cm = np.zeros((2, 2)) for t, pred, index in zip(true, predictions, indices): if (pred[1] > 0.5): if (t[1] > 0.5): cm[1][1] += 1 else: cm[1][0] += 1 else: if (t[1] > 0.5): cm[0][1] += 1 else: cm[0][0] += 1 print(cm) utils.metrics(true[:, 1], predictions[:, 1], plotDir, threshold=0.5) print() print(utils.bcolors.GREEN + "Saved metrics to " + plotDir + utils.bcolors.ENDC) print() np.savez_compressed(batchDir + "validation_outputs", truth=true, predicted=predictions, indices=indices)
def test(): results = { 'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': [] } for idx in range(len(test_dataset)): # Load data sample = test_dataset[idx] images = sample['images'].to(device) labels = sample['labels'].to(device) # Predict on batch of images predictions = model(images) # Track batch results m = utils.metrics(predictions, labels) for key in m.keys(): results[key].append(m[key]) # Get the average over all batches for key in results.keys(): results[key] = np.mean(results[key]) w = csv.writer(open(os.path.join(args.results_path, "results.csv"), "w")) for key, val in results.items(): w.writerow([key, val])
def train_epoch(model, iterator, optimizer, criterion, device, file_log=None): epoch_loss = 0 model.train() count_step = 0 truth_list = np.array([]) predict_list = np.array([]) for batch_inputs, batch_ouputs in tqdm(iterator): input_tensor = torch.tensor(batch_inputs, dtype=torch.float32).to(device) y_true = torch.tensor(batch_ouputs).to(device) optimizer.zero_grad() predictions = model(input_tensor).squeeze(1) loss = criterion(predictions, y_true) loss.backward() optimizer.step() y_preds = torch.argmax(predictions, dim=-1) truth_list = np.concatenate([truth_list, y_true.cpu().numpy()], axis=0) predict_list = np.concatenate([predict_list, y_preds.cpu().numpy()], axis=0) epoch_loss += loss.item() count_step += 1 f1, acc = utils.metrics(predict_list.astype('int16'), truth_list.astype('int16'), print_report=True, file_log=file_log) return epoch_loss / count_step, acc, f1
def single_run_test(hyperparams): img, gt = load_and_update(hyperparams) ofname = get_checkpoint_filename(hyperparams)[:-4] ofname += "_" + hyperparams['dataset'] prediction, hyperparams = model_prediction(img, ofname, hyperparams) if hyperparams['sampling_mode'] == 'fixed': gt = get_fixed_sets(hyperparams['run'], hyperparams['sample_path'], hyperparams['dataset'], mode='test') run_results = metrics(prediction, gt, hyperparams['ignored_labels'], hyperparams['n_classes']) path = '{rdir}/prediction_training_{train_dataset}_test_{dataset}_epoch_{epoch}_batch_{batch_size}'.format( **hyperparams) os.makedirs(path, exist_ok=True) show_results(run_results, None, hyperparams['model'], hyperparams['dataset'], path, hyperparams['preprocessing']["type"], label_values=hyperparams['label_values'], training_image=hyperparams['train_dataset'], agregated=False) plot_names = { 'path': path, 'checkpoint': get_checkpoint_filename(hyperparams), 'dataset': hyperparams['dataset'], 'ignored': hyperparams['ignored_labels'] } create_plots(hyperparams['multi_class'], prediction, gt, plot_names)
def evaluate(self, goldp = None, silverp = None, gold_data = None, silver_data = None, print_score = True): """ * Compares two syllabified lists in string format (e.g. ser-uaes): gold = ground truth silver = as predicted by system * Both lists can be passed as lists (`gold_data`, `silver_data`) or can be loaded from files (`goldp`, `silverp`). * Will return the token-level accuracy and hyphenation accuracy of the silver predictions (will print these if `print_score` is True). """ if goldp: gold_data = utils.load_data(goldp) if silverp: silver_data = utils.load_data(silverp) _, gold_Y = self.vectorize(gold_data) _, silver_Y = self.vectorize(silver_data) token_acc, hyphen_acc = utils.metrics(utils.pred_to_classes(gold_Y), utils.pred_to_classes(silver_Y)) if print_score: print('\t- evaluation scores:') print('\t\t + token acc:', round(token_acc, 2)) print('\t\t + hyphen acc:', round(hyphen_acc, 2)) return token_acc, hyphen_acc
def predict(siamese_model, data, instance_ids, outpath, is_submission=False): """ make predictions with a trained Siamese model Args: siamese_model: a trained keras model data: numpy arrays that holds train data outpath: output file path is_submission: a flag to indicate the data is for submission Returns: None """ y_pred = siamese_model.predict(data[:-1], verbose=0) if not is_submission: y = data[-1] df_pred = utils.create_prediction_df(y, y_pred, instance_ids) df_pred.to_csv(outpath, index=False) df_metrics, conf = utils.metrics(y, y_pred) print(df_metrics) df_metrics.to_csv(outpath[:-4] + "_metrics.csv", index=False) else: # prediction for a submission df_id = pd.DataFrame(instance_ids, columns=["test_id"]) df_proba = pd.DataFrame(y_pred, columns=["is_duplicate"]) df_pred = pd.concat([df_id, df_proba], axis=1) df_pred.to_csv(outpath, index=False)
def validation(): model.eval() losses = [] accuracy = [] f1 = [] specificity = [] precision = [] for idx in range(len(dataset_valid)): sample = dataset_valid[idx] # Load data to GPU images = sample['images'].to(device) labels = sample['labels'].to(device) # Forward pass predicted = model(images) # Loss loss = criterion(predicted, labels) losses.append(loss.data.item()) # Metrics metrics = utils.metrics(predicted, labels) accuracy.append(metrics['accuracy']) f1.append(metrics['f1']) specificity.append(metrics['specificity']) precision.append(metrics['precision']) return torch.tensor(losses).mean(), torch.tensor(accuracy).mean(), torch.tensor(f1).mean(), \ torch.tensor(specificity).mean(), torch.tensor(precision).mean()
def calculate_metric(channels): """ Calculates the evaluation metrics for the original and reconstructed images :param channels: List of channels :return: None """ folders = os.walk("../results") num_folders = 0 SSIM = {channel: [] for channel in channels} PSNR = {channel: [] for channel in channels} next(folders) for folder in folders: num_folders += 1 firstImage = io.imread(f"{folder[0]}/original_image.png") for channel in channels: secondImage = io.imread(f"{folder[0]}/image_{channel}.png") image_metrics = metrics(firstImage, secondImage) SSIM[channel].append(image_metrics["SSIM"]) PSNR[channel].append(image_metrics["PSNR"]) with open("../results/avg_ssim.txt", "w") as file: for channel in SSIM.keys(): file.write(f"SSIM for {channel} Channel : {sum(SSIM[channel]) / len(SSIM[channel])}\n") with open("../results/avg_psnr.txt", "w") as file: for channel in PSNR.keys(): file.write(f"PSNR for {channel} Channel : {sum(PSNR[channel]) / len(PSNR[channel])}\n")
def model_train(x_train, y_train, x_val, y_val, params): svc = SVC(kernel='rbf', verbose=True, **params) svc.fit(x_train, y_train) y_hat = svc.predict(x_val) f1 = metrics(y_val, y_hat) return -f1
def val(self): self.model.eval() tbar = tqdm(self.val_queue) for step, (input, target) in enumerate(tbar): input = input.to(device=self.device, dtype=torch.float32) target = target.to(device=self.device, dtype=torch.float32) pred = self.model(input) pred = torch.sigmoid(pred) self.loss = (.75 * self.criterion(pred, target) + .25 * self.dice( (pred > 0.5).float(), target)) # self.dice = dice_coeff(pred, target.squeeze(dim=1)) # pred = (pred > .5).float() # self.dice_score = 1 - self.dice(pred, target) self.val_loss_meter.update(self.loss.item(), input.size(0)) ###########CAL METRIC############ SE, SPE, ACC, DICE = metrics(pred, target) self.val_accuracy.update(ACC, input.size(0)) self.val_sensitivity.update(SE, input.size(0)) self.val_specificity.update(SPE, input.size(0)) self.val_dice.update(DICE, input.size(0)) ################################# tbar.set_description('Val_Loss: {:.4f}; Val_Dice: {:.4f}'.format( self.val_loss_meter.mloss, self.val_dice.mloss)) self.writer.add_images('Val/Images', input, self.epoch) self.writer.add_images('Val/Masks/True', target, self.epoch) self.writer.add_images('Val/Masks/pred', (pred > .5).float(), self.epoch) if self.val_dice.mloss > self.best_dice: self.best_dice = self.val_dice.mloss self.best_sen = self.val_sensitivity.mloss self.best_epoch = self.epoch ckpt_file_path = self.path + '/ckpt/best_weights.pth.tar' torch.save( { 'epoch': self.epoch, 'state_dict': self.model.state_dict(), }, ckpt_file_path) self.writer.add_scalar('Val/Loss', self.val_loss_meter.mloss, self.epoch) self.writer.add_scalar('Val/Dice', self.val_dice.mloss, self.epoch) self.writer.add_scalar('Val/Acc', self.val_accuracy.mloss, self.epoch) self.writer.add_scalar('Val/Sen', self.val_sensitivity.mloss, self.epoch) self.writer.add_scalar('Val/Spe', self.val_specificity.mloss, self.epoch)
def _train(self, X, Y, print_after, plot): """Training function :param X: numpy array Training Examples Y: numpy array Target values print_after: int Logs the loss, accuracy after certain iterations plot: bool Indicates whether to draw the plots or not :returns cost: float Error value after training the model accuracy_to_plot: list<float> List of accuracies at each iteration of the model errors_to_plot: list<float> List of errors at each iteration of the model""" m, n = X.shape temp_error = [] temp_accuracy = [] for epoch in range(0, self.n_epoch + 1): output = self.sigmoid(np.dot(self.w, X.T) + self.b) pos_class_loss = Y * np.log(output) neg_class_loss = (1 - Y) * np.log(1 - output) cost = (1 / m) * np.sum(-pos_class_loss - neg_class_loss) dw = (1 / m) * np.dot(output - Y, X) db = (1 / m) * np.sum(output - Y) if self.reg == 'L1': cost += (self.beta / m) * np.sum(np.abs(self.w)) dw += (self.beta / m) * np.sign(self.w) elif self.reg == 'L2': cost += (self.beta / m) * np.sum(np.square(self.w)) dw += (self.beta / m) * self.w if plot: Y_pred = self.classify(X) accuracy = metrics(Y.reshape(-1), Y_pred.reshape(-1))['accuracy'] temp_error.append(cost) temp_accuracy.append(accuracy) self.w -= dw * self.ieta self.b -= db * self.ieta if epoch % print_after == 0: print("\tEpoch {:4}/{} ---> {:.4f} | Accuracy: ---> {:.4f}". format(epoch, self.n_epoch, cost, self.score(Y, output))) return cost, temp_accuracy, temp_error
def run_batch_validation(model, weights, batchDir, dataDir, plotDir): print("------------------STARTING VALIDATION--------------------") model.load_weights(weights) # load the batches used to train and validate val_e_file_batches = np.load(batchDir + 'e_files_valBatches.npy', allow_pickle=True) val_e_event_batches = np.load(batchDir + 'e_events_valBatches.npy', allow_pickle=True) val_bkg_file_batches = np.load(batchDir + 'bkg_files_valBatches.npy', allow_pickle=True) val_bkg_event_batches = np.load(batchDir + 'bkg_events_valBatches.npy', allow_pickle=True) file_batches = np.concatenate((val_e_file_batches, val_bkg_file_batches)) event_batches = np.concatenate( (val_e_event_batches, val_bkg_event_batches)) class_labels = np.concatenate((['e'] * val_e_file_batches.shape[0], ['bkg'] * val_bkg_file_batches.shape[0])) predictions, infos = [], [] for events, files, class_label in zip(event_batches, file_batches, class_labels): events = load_data(files, events, class_label, dataDir) batch_infos = load_data(files, events, class_label + '_infos', dataDir) events = events[:, 3:] events = np.reshape(events, (events.shape[0], 100, 4)) preds = model.predict(events) predictions.append(preds) infos.append(batch_infos) utils.metrics(true[:, 1], predictions[:, 1], plotDir, threshold=0.5) print() print(utils.bcolors.GREEN + "Saved metrics to " + plotDir + utils.bcolors.ENDC) print() np.savez_compressed(batchDir + "validation_outputs", truth=true, predicted=predictions, indices=indices)
def _inference(model, batch, all_items_list): model.eval() res = np.array([0.] * 6) with torch.no_grad(): uids, h_items, h_attrs, t_iids = [x for x in batch] scores = model(h_items.to(device), h_attrs.to(device)) for u in range(len(uids)): target = t_iids[u].numpy() leave_out_one_sample = random.sample(all_items_list, args.n_neg) if target not in leave_out_one_sample: leave_out_one_sample[0] = target scores_temp = scores[u].cpu().detach().numpy() item_score = [(j, scores_temp[j]) for j in leave_out_one_sample] item_score = sorted(item_score, key=lambda x: x[1]) item_score.reverse() item_sort = [x[0] for x in item_score] res[0:3] += utils.metrics(item_sort[0:10], target) res[3:6] += utils.metrics(item_sort[0:20], target) res /= len(t_iids) return res
def Q2(): X = load_obj('X_Q2') y = load_obj('label_Q2') rf = RandomForestClassifier(max_features=50, random_state=20) svc = svm.LinearSVC(C=10) lr = LogisticRegression(random_state=20) knn = KNeighborsClassifier(n_neighbors=3) mlp = MLPClassifier(solver='lbfgs', activation="relu", alpha=1e-4, hidden_layer_sizes=(200, 400), random_state=1) dt = DecisionTreeClassifier(random_state=20) clfs = [rf, svc, lr, knn, mlp, dt] clf_names = ['rf', 'svc', 'lr', 'knn', 'mlp', 'dt'] for clf, clf_name in zip(clfs, clf_names): print(clf_name) score = True if clf_name == 'svc': score = False y_t_train, y_p_train, y_t_test, y_p_test, y_score_train, y_score_test \ = cross_val(clf, X, y, shuffle=True, score=score, verbose=True) acc_test, rec_test, prec_test = metrics(y_t_test, y_p_test) acc_train, rec_train, prec_train = metrics(y_t_train, y_p_train) print( 'Test accuracy %0.4f, recall score %0.4f and precision score %.4f' % (acc_test, rec_test, prec_test)) print( 'Train accuracy %0.4f, recall score %0.4f and precision score %.4f' % (acc_train, rec_train, prec_train)) classnames = ['Washington', 'Massachusetts'] plot_confusion_matrix(y_t_test, y_p_test, classnames) plot_ROC(y_t_test, y_score_test, no_score=(not score))
def test(model): model = model.to(device) number = 0 running_loss = 0.0 acc = 0.0 H = 0 S = 0 common = 0 containLink = 0 linkNumber = 0 model.eval() for i in range(len(test_list)): pred_span=[] for j in range(len(test_list[i])): model.hidden = model.init_hidden() sentence_in = v.prepare_sequence(test_list[i][j]).to(device) labels = tag.prepare_sequence(test_labels[i][j],tag_to_indx).to(device) n = len(test_list[i][j]) number += n output = model(sentence_in) loss = nn.functional.nll_loss(output,labels) _,pred = torch.max(output,dim=1) # print(pred.data) for indexs in convert(pred.data): pred_span.append([test_span_list[i][j][indexs[0]][0],test_span_list[i][j][indexs[1]][1]]) acc += torch.sum(torch.eq(pred,labels).float()).data running_loss += n*loss.data S += len(pred_span) H += len(test_labels_span[i]) common += metrics(pred_span,test_labels_span[i]) tmpContainLink,tmpLinkNumber = linkMetrics(pred_span,test_linkSpans[i]) containLink += tmpContainLink linkNumber +=tmpLinkNumber print(S,H,common) if(S != 0): precision = common/S else: precision = 0.0 recall = common/H if(common==0): F1 = 0.0 else: F1 = 2*recall*precision/float(recall+precision) print(containLink,linkNumber) print('loss: %.4f , acc: %.4f , precision: %.4f, recall: %.4f,F1: %.4f,LinkRecall: %.4f Testing' %(running_loss/number,acc/number,precision,recall,F1,containLink/linkNumber)) return running_loss/number,acc/number,precision,recall,F1,containLink/linkNumber
def fit(self, path, print_after=1, plot=False): """Wrapper method for training and saving the model""" X_train, X_test, Y_train, Y_test = self._load(path) Y_train = Y_train.reshape((1, -1)) Y_test = Y_test.reshape((1, -1)) X_train = standardize(X_train) X_test = standardize(X_test) _, n_feature = X_train.shape accuracy_to_plot = [] error_to_plot = [] curr_best = -1 for iter_ in range(self.n_init): print("Running Model {}".format(iter_ + 1)) self._init_weight(n_feature) cost, accuracy, error = self._train(X_train, Y_train, print_after, plot) if iter_ == 0 or cost < curr_best: self._save() curr_best = cost accuracy_to_plot = accuracy error_to_plot = error print("Loading the best model ...") dict_ = self.load_state_dict() self.w = dict_['w'] self.b = dict_['b'] if plot: plt.figure(1) plt.plot(range(self.n_epoch + 1), error_to_plot, c='b') plt.xlabel('Number of Epochs') plt.ylabel('Logistic Loss') plt.title('Loss Function vs Epochs') plt.savefig('./regr_error_plot.png') plt.figure(2) plt.plot(range(self.n_epoch + 1), accuracy_to_plot, c='r') plt.xlabel('Number of Epochs') plt.ylabel('Accuracy %') plt.title('Accuracy vs Epochs') plt.savefig('./regr_accuracy_plot.png') Y_pred = self.classify(X_test) dict_ = metrics(Y_test.reshape(-1), Y_pred.reshape(-1)) print("Validation Accuracy: {:4}".format(dict_['accuracy'])) print("F-Score: {:4}".format(100 * dict_['f1-score']))
def main(args): """Main function of RL-LIM for synthetic data experiments. Args: args: data_name, data_no, seed, hyperparams, network parameters Returns: awd: Absolute Weight Difference """ # Inputs data_name = args.data_name data_no = args.data_no seed = args.seed hyperparam = args.hyperparam # Network parameters parameters = dict() parameters['hidden_dim'] = args.hidden_dim parameters['iterations'] = args.iterations parameters['layer_number'] = args.layer_number parameters['batch_size'] = args.batch_size parameters['batch_size_small'] = args.batch_size_small # Data loading train_x, train_y_hat, test_x, test_y_hat, test_c, test_idx = \ synthetic_data_loading(data_name, data_no, seed) print('Finish ' + str(data_name) + ' data loading') # Fits RL-LIM np.random.seed(seed) idx = np.random.permutation(len(train_y_hat)) train_idx = idx[:int(0.9 * len(train_y_hat))] valid_idx = idx[int(0.9 * len(train_y_hat)):] valid_x = train_x[valid_idx, :] valid_y_hat = train_y_hat[valid_idx] train_x = train_x[train_idx, :] train_y_hat = train_y_hat[train_idx] test_y_fit, test_coef = rllim(train_x, train_y_hat, valid_x, valid_y_hat, test_x, parameters, hyperparam) # Performance evaluation _, awd = metrics(test_y_hat, test_y_fit, test_coef, test_c, test_idx) print('AWD' + str(np.round(awd, 4))) return awd
def train(self): self.model.train() tbar = tqdm(self.train_queue) for step, (input, target) in enumerate(tbar): input = input.to(device=self.device, dtype=torch.float32) target = target.to(device=self.device, dtype=torch.float32) predicts = self.model(input) predicts_prob = torch.sigmoid(predicts) self.dice = DiceLoss() self.loss = (.75 * self.criterion(predicts_prob, target) + .25 * self.dice( (predicts_prob > 0.5).float(), target)) self.train_loss_meter.update(self.loss.item(), input.size(0)) self.model_optimizer.zero_grad() self.loss.backward() self.model_optimizer.step() ###########CAL METRIC############ SE, SPE, ACC, DICE = metrics(predicts_prob, target) self.train_accuracy.update(ACC, input.size(0)) self.train_sensitivity.update(SE, input.size(0)) self.train_specificity.update(SPE, input.size(0)) self.tr_dice.update(DICE, input.size(0)) ################################# tbar.set_description( 'loss: %.4f; dice: %.4f' % (self.train_loss_meter.mloss, self.tr_dice.mloss)) self.writer.add_images('Train/Images', input, self.epoch) self.writer.add_images('Train/Masks/True', target, self.epoch) self.writer.add_images('Train/Masks/pred', (predicts_prob > .5).float(), self.epoch) self.writer.add_scalar('Train/loss', self.train_loss_meter.mloss, self.epoch) self.writer.add_scalar('Train/Acc', self.train_accuracy.mloss, self.epoch) self.writer.add_scalar('Train/Sen', self.train_sensitivity.mloss, self.epoch) self.writer.add_scalar('Train/Spe', self.train_specificity.mloss, self.epoch) self.writer.add_scalar('Train/Dice', self.tr_dice.mloss, self.epoch)
def evaluate_model_performance(model, test_data): print("Predicting outputs...") predictions = model.predict(test_data[:, :-1]) predictions = np.squeeze(predictions) ground_truth = test_data[:, -1] num_classes = np.unique(ground_truth).shape[0] print("Evaluating metrics...") confusion_mat = utils.confusion_matrix(predictions, ground_truth, num_classes) total_accuracy, precision, recall, f1_score = utils.metrics( predictions, ground_truth, num_classes) np.set_printoptions(precision=4, suppress=True) print("\nOverall accuracy = {}\n".format(total_accuracy)) print("Confusion Matrix:\n{}\n".format(confusion_mat)) print("Precision wrt each class:\n{}\n".format(precision)) print("Recall wrt each class:\n{}\n".format(recall)) print("F1-score wrt each class:\n{}\n".format(f1_score))
def test(self): test_images = Angioectasias(self.abnormality, mode='test') self.test_queue = data.DataLoader(test_images, batch_size=1, drop_last=False) test_path = './' + self.abnormality + '/test/images' input_files = natsorted(os.listdir(test_path)) save_path = './' + self.abnormality + '/' + self.d + '/pred/' if not os.path.exists(save_path): os.makedirs(save_path) self.model.load_state_dict( torch.load('./' + self.abnormality + '/' + self.d + '/ckpt/best_weights.pth.tar')['state_dict']) self.model.eval() self.test_dice = AverageMeter() self.test_accuracy = AverageMeter() self.test_sensitivity = AverageMeter() self.test_specificity = AverageMeter() with torch.no_grad(): for k, (img, target) in enumerate(tqdm(self.test_queue)): img = img.to(self.device, dtype=torch.float32) out = self.model(img) out = torch.sigmoid(out) SE, SPE, ACC, DICE = metrics(out, target) self.test_accuracy.update(ACC, img.size(0)) self.test_sensitivity.update(SE, img.size(0)) self.test_specificity.update(SPE, img.size(0)) self.test_dice.update(DICE, img.size(0)) out = out[0].cpu().numpy() out = np.transpose(out, (1, 2, 0)) out = out * 255 out.astype('uint8') cv2.imwrite(save_path + input_files[k], out) print('Acc: {:.4f}, Sen: {:.4f}, Spe: {:.4f}, Dice: {:.4f}'.format( self.test_accuracy.mloss, self.test_sensitivity.mloss, self.test_specificity.mloss, self.test_dice.mloss))
def evaluate(model, valid_dataloader, embed, args): with torch.no_grad(): model.eval() losses, correct = 0, 0 y_hats, targets = [], [] for x, y in valid_dataloader: x, y = x.to(args.device), y.to(args.device) pred = model(x, args) loss = F.cross_entropy(pred, y) losses += loss.item() y_hat = torch.max(pred, 1)[1] y_hats += y_hat.tolist() targets += y.tolist() correct += (y_hat == y).sum().item() avg_loss, accuracy, precision, recall, f1, cm = metrics( valid_dataloader, losses, correct, y_hats, targets) return avg_loss, accuracy, precision, recall, f1, cm
def evaluate(config, model, criterion, validation_loader, method, test_flag=False, save_dir=None): losses = AverageMeter('Loss', ':.5f') conf_meter = ConfusionMeter(config['n_class']) with torch.no_grad(): model.eval() for t, (inputs, labels, names) in enumerate(tqdm(validation_loader)): inputs, labels = inputs.cuda(), labels.cuda().long() if method == 'pixelnet': model.set_train_flag(False) # compute output outputs = model(inputs) loss = criterion(outputs, labels) # save predictions if needed predictions = outputs.cpu().argmax(1) if test_flag: for i in range(predictions.shape[0]): plt.imsave('%s/%s.png' % (save_dir, names[i][:-4]), predictions[i].squeeze(), cmap='gray') # measure accuracy, record loss losses.update(loss.item(), inputs.size(0)) conf_meter.add( outputs.permute(0, 2, 3, 1).contiguous().view(-1, config['n_class']), labels.view(-1)) if test_flag: print('--- evaluation result ---') else: print('--- validation result ---') conf_mat = conf_meter.value() acc, iou = metrics(conf_mat, verbose=test_flag) print('loss: %.5f, accuracy: %.5f, IU: %.5f' % (losses.avg, acc, iou)) return losses.avg, acc, iou
def evaluate(save_model): print() print("--------Evaluating DATE model---------") # create best model best_model = torch.load(model_path) best_model.eval() # get threshold y_prob, val_loss = best_model.module.eval_on_batch(valid_loader) best_threshold, val_score, roc = torch_threshold(y_prob,xgb_validy) # predict test y_prob, val_loss = best_model.module.eval_on_batch(test_loader) overall_f1, auc, precisions, recalls, f1s, revenues = metrics(y_prob,xgb_testy,revenue_test,best_threshold) best_score = f1s[0] os.system("rm %s"%model_path) if save_model: scroed_name = "./saved_models/%s_%.4f.pkl" % (model_name,overall_f1) torch.save(best_model,scroed_name) return overall_f1, auc, precisions, recalls, f1s, revenues
def evaluate(save_model, exp_id): print() print("--------Evaluating DATE model---------") # create best model best_model = torch.load(model_path) best_model.eval() # get threshold y_prob, val_loss = best_model.eval_on_batch(valid_loader) best_threshold, val_score, roc = torch_threshold(y_prob,xgb_validy) # predict test y_prob, val_loss = best_model.eval_on_batch(test_loader) overall_f1, auc, precisions, recalls, f1s, revenues = metrics(y_prob,xgb_testy,revenue_test,best_threshold) best_score = f1s[0] os.system("rm %s"%model_path) if save_model: scroed_name = f'./saved_models/DATE_{starting_date}_{exp_id}_{round(overall_f1, 4)}.pkl' torch.save(best_model,scroed_name) return overall_f1, auc, precisions, recalls, f1s, revenues
def evaluate(self, goldp=None, silverp=None, gold_data=None, silver_data=None, print_score=True): """ * Compares two syllabified lists in string format (e.g. ser-uaes): gold = ground truth silver = as predicted by system * Both lists can be passed as lists (`gold_data`, `silver_data`) or can be loaded from files (`goldp`, `silverp`). * Will return the token-level accuracy and hyphenation accuracy of the silver predictions (will print these if `print_score` is True). """ if goldp: gold_data = utils.load_data(goldp) if silverp: silver_data = utils.load_data(silverp) _, gold_Y = self.vectorize(gold_data) _, silver_Y = self.vectorize(silver_data) token_acc, hyphen_acc = utils.metrics(utils.pred_to_classes(gold_Y), utils.pred_to_classes(silver_Y)) if print_score: print('\t- evaluation scores:') print('\t\t + token acc:', round(token_acc, 2)) print('\t\t + hyphen acc:', round(hyphen_acc, 2)) return token_acc, hyphen_acc
def retrain(args): # load dataset g_homo, g_list, pairs, labels, train_mask, val_mask, test_mask = u.load_data( args['name'], args['train_size']) # transfer pairs = t.from_numpy(pairs).to(args['device']) labels = t.from_numpy(labels).to(args['device']) train_mask = t.from_numpy(train_mask).to(args['device']) val_mask = t.from_numpy(val_mask).to(args['device']) test_mask = t.from_numpy(test_mask).to(args['device']) feat1 = t.randn(g_homo.number_of_nodes(), args['in_feats']).to(args['device']) feat2 = t.randn(g_list[0].number_of_nodes(), args['in_feats']).to(args['device']) labels = labels.view(-1, 1).to(dtype=t.float32) # model if args['model'] == 'SRG': model = m.SRG(rgcn_in_feats=args['in_feats'], rgcn_out_feats=args['embedding_size'], rgcn_num_blocks=args['num_b'], rgcn_dropout=0., han_num_meta_path=args['num_meta_path'], han_in_feats=args['in_feats'], han_hidden_feats=args['embedding_size'], han_head_list=args['head_list'], han_dropout=args['drop_out'], fc_hidden_feats=args['fc_units'] ).to(args['device']) elif args['model'] == 'SRG_GAT': model = m.SRG_GAT(rgcn_in_feats=args['in_feats'], rgcn_out_feats=args['embedding_size'], rgcn_num_blocks=args['num_b'], rgcn_dropout=args['drop_out'], han_num_meta_path=args['num_meta_path'], han_in_feats=args['in_feats'], han_hidden_feats=args['embedding_size'], han_head_list=args['head_list'], han_dropout=args['drop_out'], fc_hidden_feats=args['fc_units'] ).to(args['device']) elif args['model'] == 'SRG_no_GRU': model = m.SRG_no_GRU(gcn_in_feats=args['in_feats'], gcn_out_feats=args['embedding_size'], gcn_num_layers=args['num_l'], han_num_meta_path=args['num_meta_path'], han_in_feats=args['in_feats'], han_hidden_feats=args['embedding_size'], han_head_list=args['head_list'], han_dropout=args['drop_out'], fc_hidden_feats=args['fc_units'] ).to(args['device']) elif args['model'] == 'SRG_Res': model = m.SRG_Res(gcn_in_feats=args['in_feats'], gcn_out_feats=args['embedding_size'], gcn_num_layers=args['num_l'], han_num_meta_path=args['num_meta_path'], han_in_feats=args['in_feats'], han_hidden_feats=args['embedding_size'], han_head_list=args['head_list'], han_dropout=args['drop_out'], fc_hidden_feats=args['fc_units'] ).to(args['device']) elif args['model'] == 'SRG_no_GCN': model = m.SRG_no_GCN(han_num_meta_path=args['num_meta_path'], han_in_feats=args['in_feats'], han_hidden_feats=args['embedding_size'], han_head_list=args['head_list'], han_dropout=args['drop_out'], fc_hidden_feats=args['fc_units'] ).to(args['device']) else: raise ValueError('wrong name of the model') model.load_state_dict(t.load(args['model_path'])) # log log = [] mae, rmse = u.evaluate(model, g_homo, feat1, g_list, feat2, pairs, labels, val_mask) early_stop = u.EarlyStopping( args['model_path'], patience=args['patience'], rmse=rmse, mae=mae) # loss, optimizer loss_func = t.nn.MSELoss() optimizer = t.optim.Adam( model.parameters(), lr=args['lr'], weight_decay=args['decay']) # train for epoch in range(args['epochs']): dt = datetime.now() model.train() y_pred = model(g_homo, feat1, g_list, feat2, pairs) loss = loss_func(y_pred[train_mask], labels[train_mask]) loss.backward() optimizer.step() optimizer.zero_grad() train_mae, train_rmse = u.metrics( y_pred[train_mask].detach(), labels[train_mask]) val_mae, val_rmse = u.evaluate( model, g_homo, feat1, g_list, feat2, pairs, labels, val_mask) stop = early_stop.step(val_rmse, val_mae, model) elapse = str(datetime.now() - dt)[:10] + '\n' log.append(' '.join(str(x) for x in (epoch, train_mae, train_rmse, val_mae, val_rmse, elapse))) print(f'epoch={epoch} | train_MAE={train_mae} | train_RMSE={train_rmse} | val_MAE={val_mae} | val_RMSE={val_rmse} | elapse={elapse}') if stop: break early_stop.load_checkpoint(model) test_mae, test_rmse = u.evaluate( model, g_homo, feat1, g_list, feat2, pairs, labels, test_mask) print(f'test_MAE={test_mae} | test_RMSE={test_rmse}') # save log with open(args['log_path'], 'a') as f: f.writelines(log)
def train(args): # get configs epochs = args.epoch dim = args.dim lr = args.lr weight_decay = args.l2 head_num = args.head_num device = args.device act = args.act fusion = args.fusion beta = args.beta alpha = args.alpha use_self = args.use_self agg = args.agg model = DATE(leaf_num,importer_size,item_size,\ dim,head_num,\ fusion_type=fusion,act=act,device=device,\ use_self=use_self,agg_type=agg, ).to(device) model = nn.DataParallel(model,device_ids=[0,1]) # initialize parameters for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) # optimizer & loss optimizer = Ranger(model.parameters(), weight_decay=weight_decay,lr=lr) cls_loss_func = nn.BCELoss() reg_loss_func = nn.MSELoss() # save best model global_best_score = 0 model_state = None # early stop settings stop_rounds = 3 no_improvement = 0 current_score = None for epoch in range(epochs): for step, (batch_feature,batch_user,batch_item,batch_cls,batch_reg) in enumerate(train_loader): model.train() # prep to train model batch_feature,batch_user,batch_item,batch_cls,batch_reg = \ batch_feature.to(device), batch_user.to(device), batch_item.to(device),\ batch_cls.to(device), batch_reg.to(device) batch_cls,batch_reg = batch_cls.view(-1,1), batch_reg.view(-1,1) # model output classification_output, regression_output, hidden_vector = model(batch_feature,batch_user,batch_item) # FGSM attack adv_vector = fgsm_attack(model,cls_loss_func,hidden_vector,batch_cls,0.01) adv_output = model.module.pred_from_hidden(adv_vector) # calculate loss adv_loss_func = nn.BCELoss(weight=batch_cls) adv_loss = beta * adv_loss_func(adv_output,batch_cls) cls_loss = cls_loss_func(classification_output,batch_cls) revenue_loss = alpha * reg_loss_func(regression_output, batch_reg) loss = cls_loss + revenue_loss + adv_loss optimizer.zero_grad() loss.backward() optimizer.step() if (step+1) % 1000 ==0: print("CLS loss:%.4f, REG loss:%.4f, ADV loss:%.4f, Loss:%.4f"\ %(cls_loss.item(),revenue_loss.item(),adv_loss.item(),loss.item())) # evaluate model.eval() print("Validate at epoch %s"%(epoch+1)) y_prob, val_loss = model.module.eval_on_batch(valid_loader) y_pred_tensor = torch.tensor(y_prob).float().to(device) best_threshold, val_score, roc = torch_threshold(y_prob,xgb_validy) overall_f1, auc, precisions, recalls, f1s, revenues = metrics(y_prob,xgb_validy,revenue_valid) select_best = np.mean(f1s) print("Over-all F1:%.4f, AUC:%.4f, F1-top:%.4f" % (overall_f1, auc, select_best) ) print("Evaluate at epoch %s"%(epoch+1)) y_prob, val_loss = model.module.eval_on_batch(test_loader) y_pred_tensor = torch.tensor(y_prob).float().to(device) overall_f1, auc, precisions, recalls, f1s, revenues = metrics(y_prob,xgb_testy,revenue_test,best_thresh=best_threshold) print("Over-all F1:%.4f, AUC:%.4f, F1-top:%.4f" %(overall_f1, auc, np.mean(f1s)) ) # save best model if select_best > global_best_score: global_best_score = select_best torch.save(model,model_path) # early stopping if current_score == None: current_score = select_best continue if select_best < current_score: current_score = select_best no_improvement += 1 if no_improvement >= stop_rounds: print("Early stopping...") break if select_best > current_score: no_improvement = 0 current_score = None
def main(raw_args=None): parser = argparse.ArgumentParser( description="Hyperspectral image classification with FixMatch") parser.add_argument( '--patch_size', type=int, default=5, help='Size of patch around each pixel taken for classification') parser.add_argument( '--center_pixel', action='store_false', help= 'use if you only want to consider the label of the center pixel of a patch' ) parser.add_argument('--batch_size', type=int, default=10, help='Size of each batch for training') parser.add_argument('--epochs', type=int, default=10, help='number of total epochs of training to run') parser.add_argument('--dataset', type=str, default='Salinas', help='Name of dataset to run, Salinas or PaviaU') parser.add_argument('--cuda', type=int, default=-1, help='what CUDA device to run on, -1 defaults to cpu') parser.add_argument('--warmup', type=float, default=0, help='warmup epochs') parser.add_argument('--save', action='store_true', help='use to save model weights when running') parser.add_argument( '--test_stride', type=int, default=1, help='length of stride when sliding patch window over image for testing' ) parser.add_argument( '--sampling_percentage', type=float, default=0.3, help= 'percentage of dataset to sample for training (labeled and unlabeled included)' ) parser.add_argument( '--sampling_mode', type=str, default='nalepa', help='how to sample data, disjoint, random, nalepa, or fixed') parser.add_argument('--lr', type=float, default=0.001, help='initial learning rate') parser.add_argument('--alpha', type=float, default=1.0, help='beta distribution range') parser.add_argument( '--class_balancing', action='store_false', help='use to balance weights according to ratio in dataset') parser.add_argument( '--checkpoint', type=str, default=None, help='use to load model weights from a certain directory') #Augmentation arguments parser.add_argument('--flip_augmentation', action='store_true', help='use to flip augmentation data for use') parser.add_argument('--radiation_augmentation', action='store_true', help='use to radiation noise data for use') parser.add_argument('--mixture_augmentation', action='store_true', help='use to mixture noise data for use') parser.add_argument('--pca_augmentation', action='store_true', help='use to pca augment data for use') parser.add_argument( '--pca_strength', type=float, default=1.0, help='Strength of the PCA augmentation, defaults to 1.') parser.add_argument('--cutout_spatial', action='store_true', help='use to cutout spatial for data augmentation') parser.add_argument('--cutout_spectral', action='store_true', help='use to cutout spectral for data augmentation') parser.add_argument( '--augmentation_magnitude', type=int, default=1, help= 'Magnitude of augmentation (so far only for cutout). Defualts to 1, min 1 and max 10.' ) parser.add_argument('--spatial_combinations', action='store_true', help='use to spatial combine for data augmentation') parser.add_argument('--spectral_mean', action='store_true', help='use to spectal mean for data augmentation') parser.add_argument( '--moving_average', action='store_true', help='use to sprectral moving average for data augmentation') parser.add_argument('--results', type=str, default='results', help='where to save results to (default results)') parser.add_argument('--save_dir', type=str, default='/saves/', help='where to save models to (default /saves/)') parser.add_argument('--data_dir', type=str, default='/data/', help='where to fetch data from (default /data/)') parser.add_argument('--load_file', type=str, default=None, help='wihch file to load weights from (default None)') parser.add_argument( '--fold', type=int, default=0, help='Which fold to sample from if using Nalepas validation scheme') parser.add_argument( '--sampling_fixed', type=str, default='True', help= 'Use to sample a fixed amount of samples for each class from Nalepa sampling' ) parser.add_argument( '--samples_per_class', type=int, default=10, help= 'Amount of samples to sample for each class when sampling a fixed amount. Defaults to 10.' ) parser.add_argument( '--supervision', type=str, default='full', help= 'check this more, use to make us of all labeled or not, full or semi') args = parser.parse_args(raw_args) device = utils.get_device(args.cuda) args.device = device #vis = visdom.Visdom() vis = None tensorboard_dir = str(args.results + '/' + datetime.datetime.now().strftime("%m-%d-%X")) os.makedirs(tensorboard_dir, exist_ok=True) writer = SummaryWriter(tensorboard_dir) if args.sampling_mode == 'nalepa': train_img, train_gt, test_img, test_gt, label_values, ignored_labels, rgb_bands, palette = get_patch_data( args.dataset, args.patch_size, target_folder=args.data_dir, fold=args.fold) args.n_bands = train_img.shape[-1] else: img, gt, label_values, ignored_labels, rgb_bands, palette = get_dataset( args.dataset, target_folder=args.data_dir) args.n_bands = img.shape[-1] args.n_classes = len(label_values) - len(ignored_labels) args.ignored_labels = ignored_labels if palette is None: # Generate color palette palette = {0: (0, 0, 0)} for k, color in enumerate( sns.color_palette("hls", len(label_values) - 1)): palette[k + 1] = tuple( np.asarray(255 * np.array(color), dtype='uint8')) invert_palette = {v: k for k, v in palette.items()} def convert_to_color(x): return utils.convert_to_color_(x, palette=palette) def convert_from_color(x): return utils.convert_from_color_(x, palette=invert_palette) if args.sampling_mode == 'nalepa': print("{} samples selected (over {})".format( np.count_nonzero(train_gt), np.count_nonzero(train_gt) + np.count_nonzero(test_gt))) writer.add_text( 'Amount of training samples', "{} samples selected (over {})".format(np.count_nonzero(train_gt), np.count_nonzero(test_gt))) utils.display_predictions(convert_to_color(test_gt), vis, writer=writer, caption="Test ground truth") else: train_gt, test_gt = utils.sample_gt(gt, args.sampling_percentage, mode=args.sampling_mode) print("{} samples selected (over {})".format( np.count_nonzero(train_gt), np.count_nonzero(gt))) writer.add_text( 'Amount of training samples', "{} samples selected (over {})".format(np.count_nonzero(train_gt), np.count_nonzero(gt))) utils.display_predictions(convert_to_color(train_gt), vis, writer=writer, caption="Train ground truth") utils.display_predictions(convert_to_color(test_gt), vis, writer=writer, caption="Test ground truth") model = HamidaEtAl(args.n_bands, args.n_classes, patch_size=args.patch_size) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, nesterov=True, weight_decay=0.0005) #loss_labeled = nn.CrossEntropyLoss(weight=weights) #loss_unlabeled = nn.CrossEntropyLoss(weight=weights, reduction='none') if args.sampling_mode == 'nalepa': #Get fixed amount of random samples for validation idx_sup, idx_val, idx_unsup = get_pixel_idx(train_img, train_gt, args.ignored_labels, args.patch_size) if args.sampling_fixed == 'True': unique_labels = np.zeros(len(label_values)) new_idx_sup = [] index = 0 for p, x, y in idx_sup: label = train_gt[p, x, y] if unique_labels[label] < args.samples_per_class: unique_labels[label] += 1 new_idx_sup.append([p, x, y]) np.delete(idx_sup, index) index += 1 idx_unsup = np.concatenate((idx_sup, idx_unsup)) idx_sup = np.asarray(new_idx_sup) writer.add_text( 'Amount of labeled training samples', "{} samples selected (over {})".format(idx_sup.shape[0], np.count_nonzero(train_gt))) train_labeled_gt = [ train_gt[p_l, x_l, y_l] for p_l, x_l, y_l in idx_sup ] samples_class = np.zeros(args.n_classes) for c in np.unique(train_labeled_gt): samples_class[c - 1] = np.count_nonzero(train_labeled_gt == c) writer.add_text('Labeled samples per class', str(samples_class)) print('Labeled samples per class: ' + str(samples_class)) val_dataset = HyperX_patches(train_img, train_gt, idx_val, labeled='Val', **vars(args)) val_loader = data.DataLoader(val_dataset, batch_size=args.batch_size) train_dataset = HyperX_patches(train_img, train_gt, idx_sup, labeled=True, **vars(args)) train_loader = data.DataLoader( train_dataset, batch_size=args.batch_size, #pin_memory=True, num_workers=5, shuffle=True, drop_last=True) amount_labeled = idx_sup.shape[0] else: train_labeled_gt, val_gt = utils.sample_gt(train_gt, 0.95, mode=args.sampling_mode) val_dataset = HyperX(img, val_gt, labeled='Val', **vars(args)) val_loader = data.DataLoader(val_dataset, batch_size=args.batch_size) writer.add_text( 'Amount of labeled training samples', "{} samples selected (over {})".format( np.count_nonzero(train_labeled_gt), np.count_nonzero(train_gt))) samples_class = np.zeros(args.n_classes) for c in np.unique(train_labeled_gt): samples_class[c - 1] = np.count_nonzero(train_labeled_gt == c) writer.add_text('Labeled samples per class', str(samples_class)) train_dataset = HyperX(img, train_labeled_gt, labeled=True, **vars(args)) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=5, shuffle=True, drop_last=True) utils.display_predictions(convert_to_color(train_labeled_gt), vis, writer=writer, caption="Labeled train ground truth") utils.display_predictions(convert_to_color(val_gt), vis, writer=writer, caption="Validation ground truth") amount_labeled = np.count_nonzero(train_labeled_gt) args.iterations = amount_labeled // args.batch_size args.total_steps = args.iterations * args.epochs args.scheduler = get_cosine_schedule_with_warmup( optimizer, args.warmup * args.iterations, args.total_steps) if args.class_balancing: weights_balance = utils.compute_imf_weights(train_gt, len(label_values), args.ignored_labels) args.weights = torch.from_numpy(weights_balance[1:]) args.weights = args.weights.to(torch.float32) else: weights = torch.ones(args.n_classes) #weights[torch.LongTensor(args.ignored_labels)] = 0 args.weights = weights args.weights = args.weights.to(args.device) criterion = nn.CrossEntropyLoss(weight=args.weights) loss_val = nn.CrossEntropyLoss(weight=args.weights) print(args) print("Network :") writer.add_text('Arguments', str(args)) with torch.no_grad(): for input, _ in train_loader: break #summary(model.to(device), input.size()[1:]) #writer.add_graph(model.to(device), input) # We would like to use device=hyperparams['device'] altough we have # to wait for torchsummary to be fixed first. if args.load_file is not None: model.load_state_dict(torch.load(args.load_file)) model.zero_grad() try: train(model, optimizer, criterion, loss_val, train_loader, writer, args, val_loader=val_loader, display=vis) except KeyboardInterrupt: # Allow the user to stop the training pass if args.sampling_mode == 'nalepa': probabilities = test(model, test_img, args) else: probabilities = test(model, img, args) prediction = np.argmax(probabilities, axis=-1) run_results = utils.metrics(prediction, test_gt, ignored_labels=args.ignored_labels, n_classes=args.n_classes) mask = np.zeros(test_gt.shape, dtype='bool') for l in args.ignored_labels: mask[test_gt == l] = True prediction += 1 prediction[mask] = 0 color_prediction = convert_to_color(prediction) utils.display_predictions(color_prediction, vis, gt=convert_to_color(test_gt), writer=writer, caption="Prediction vs. test ground truth") utils.show_results(run_results, vis, writer=writer, label_values=label_values) writer.close() return run_results
def fit(self): """ * Fits the model during x `nb_epochs`. * If dev data is available, dev scores will be calculated after each epoch. * If test data is available, test scores are calculated after the fitting process. * Only the model weights which reach the highest hyphenation accuracy are eventually stored. """ if not hasattr(self, 'model'): self.build_model() train_inputs = {'char_input': self.X_train} train_outputs = {'out': self.Y_train} if hasattr(self, 'X_dev'): dev_inputs = {'char_input': self.X_dev} best_acc = [0.0, 0] for e in range(self.nb_epochs): print('-> epoch', e + 1) self.model.fit(train_inputs, train_outputs, nb_epoch = 1, shuffle = True, batch_size = self.batch_size, verbose=1) preds = self.model.predict(train_inputs, batch_size = self.batch_size, verbose=0) token_acc, hyphen_acc = utils.metrics(utils.pred_to_classes(self.Y_train), utils.pred_to_classes(preds)) print('\t- train scores:') print('\t\t + token acc:', round(token_acc, 2)) print('\t\t + hyphen acc:', round(hyphen_acc, 2)) if hasattr(self, 'X_dev'): preds = self.model.predict(dev_inputs, batch_size = self.batch_size, verbose=0) token_acc, hyphen_acc = utils.metrics(utils.pred_to_classes(self.Y_dev), utils.pred_to_classes(preds)) print('\t- dev scores:') print('\t\t + token acc:', round(token_acc, 2)) print('\t\t + hyphen acc:', round(hyphen_acc, 2)) if hyphen_acc > best_acc[0]: print('\t-> saving weights') self.model.save_weights(\ os.sep.join([self.model_dir, 'weights.h5']), overwrite=True) best_acc = [hyphen_acc, e] # make sure we have the best weights: print('-> Optimal dev hyphenation accuracy:', round(best_acc[0],2), 'at epoch #', best_acc[1]) self.model.load_weights(os.sep.join([self.model_dir, 'weights.h5'])) if hasattr(self, 'X_test'): test_inputs = {'char_input': self.X_test} preds = self.model.predict(test_inputs, batch_size = self.batch_size, verbose=0) token_acc, hyphen_acc = utils.metrics(utils.pred_to_classes(self.Y_test), utils.pred_to_classes(preds)) print('\t- test scores:') print('\t\t + token acc:', round(token_acc, 2)) print('\t\t + hyphen acc:', round(hyphen_acc, 2))
current_pos = 0 for i in range(batch_iter): items, labels, h_list, t_list, r_list, mem_len_list, current_pos = model.next_batch( train_data, current_pos) feed_dict = construct_feeds(model, items, labels, h_list, t_list, r_list, mem_len_list) fetchs = [model.train_op, model.loss] _, loss = model.sess.run(fetchs, feed_dict) sys.stdout.flush() ##------------------------------------------------------------------- ###-----------------------Evaluation------------------------------------------- preds = [] y_true = [] batch_iter = int(np.ceil(len(test_data) / model.batch_size)) current_pos = 0 for i in range(batch_iter): items, labels, h_list, t_list, r_list, mem_len_list, current_pos = model.next_batch( test_data, current_pos) y_true += labels feed_dict = construct_feeds(model, items, labels, h_list, t_list, r_list, mem_len_list) fetchs = [model.probs, model.loss] probs, loss = model.sess.run(fetchs, feed_dict) preds.extend(list(probs)) acc, f1, auc = utils.metrics(y_pred=np.array(preds), y_true=np.array(y_true)) print("Evaluation step %d: acc: %f, f1: %f, auc: %f" % (step, acc, f1, auc)) print("Evaluation ratio: %f" % (len(y_true) / len(test_data))) sys.stdout.flush()
except KeyboardInterrupt: # Allow the user to stop the training pass probabilities = test(model, img, hyperparams) prediction = np.argmax(probabilities, axis=-1) prediction = prediction[(PATCH_SIZE // 2):-(PATCH_SIZE // 2), (PATCH_SIZE // 2):-(PATCH_SIZE // 2)] test_gt = test_gt[(PATCH_SIZE // 2):-(PATCH_SIZE // 2), (PATCH_SIZE // 2):-(PATCH_SIZE // 2)] gt = gt[(PATCH_SIZE // 2):-(PATCH_SIZE // 2), (PATCH_SIZE // 2):-(PATCH_SIZE // 2)] run_results = metrics(prediction, test_gt, ignored_labels=hyperparams['ignored_labels'], n_classes=N_CLASSES) mask = np.zeros(gt.shape, dtype='bool') for l in IGNORED_LABELS: mask[gt == l] = True prediction[mask] = 0 color_prediction = convert_to_color(prediction) display_predictions(color_prediction, viz, gt=convert_to_color(gt), caption="Prediction vs. ground truth") results.append(run_results) show_results(run_results, viz, label_values=LABEL_VALUES)
def metrics(self, *args, **kwargs): return metrics(*args, **kwargs)