def calculate_dice_coefficient(self, sess, input_paths, target_mask_paths, dataset, num_samples=100, plot=False, print_to_screen=False): """ Calculates the dice coefficient score for a dataset, represented by a list of {input_paths} and {target_mask_paths}. Inputs: - sess: A TensorFlow Session object. - input_paths: A list of Python strs that represent pathnames to input image files. - target_mask_paths: A list of Python strs that represent pathnames to target mask files. - dataset: A Python str that represents the dataset being tested. Options: {train,dev}. Just for logging purposes. - num_samples: A Python int that represents the number of samples to test. If num_samples=None, then test whole dataset. - plot: A Python bool. If True, plots each example to screen. Outputs: - dice_coefficient: A Python float that represents the average dice coefficient across the sampled examples. """ logging.info(f"Calculating dice coefficient for {num_samples} examples " f"from {dataset}...") tic = time.time() dice_coefficient_total = 0. num_examples = 0 sbg = SliceBatchGenerator(input_paths, target_mask_paths, self.FLAGS.batch_size, shape=(self.FLAGS.slice_height, self.FLAGS.slice_width), use_fake_target_masks=self.FLAGS.use_fake_target_masks) for batch in sbg.get_batch(): predicted_masks = self.get_predicted_masks_for_batch(sess, batch) zipped_masks = zip(predicted_masks, batch.target_masks_batch, batch.input_paths_batch, batch.target_mask_path_lists_batch) for idx, (predicted_mask, target_mask, input_path, target_mask_path_list) in enumerate(zipped_masks): dice_coefficient = utils.dice_coefficient(predicted_mask, target_mask) if dice_coefficient >= 0.0: dice_coefficient_total += dice_coefficient num_examples += 1 if print_to_screen: # Whee! We predicted at least one lesion pixel! logging.info(f"Dice coefficient of valid example {num_examples}: " f"{dice_coefficient}") if plot: f, axarr = plt.subplots(1, 2) f.suptitle(input_path) axarr[0].imshow(predicted_mask) axarr[0].set_title("Predicted") axarr[1].imshow(target_mask) axarr[1].set_title("Target") examples_dir = os.path.join(self.FLAGS.train_dir, "examples") if not os.path.exists(examples_dir): os.makedirs(examples_dir) f.savefig(os.path.join(examples_dir, str(num_examples).zfill(4))) if num_samples != None and num_examples >= num_samples: break if num_samples != None and num_examples >= num_samples: break dice_coefficient_mean = dice_coefficient_total / num_examples toc = time.time() logging.info(f"Calculating dice coefficient took {toc-tic} sec.") return dice_coefficient_mean
ori_imgs[index,...][np.squeeze(thresholded_vessel, axis=0)==0]=(0,0,0) Image.fromarray(ori_imgs[index,...].astype(np.uint8)).save(os.path.join(vessels_dir,os.path.basename(filenames[index]))) # compare with the ground truth comp_dir=comparison_out.format(os.path.basename(dataset),os.path.basename(result)) if not os.path.isdir(comp_dir): os.makedirs(comp_dir) for index in range(gt_vessels.shape[0]): diff_map=utils.difference_map(gt_vessels[index,...], pred_vessels[index,...], masks[index,...]) Image.fromarray(diff_map.astype(np.uint8)).save(os.path.join(comp_dir,os.path.basename(filenames[index]))) # skip the ground truth if "1st_manual" not in result: # print metrics print "-- {} --".format(os.path.basename(result)) print "dice coefficient : {}".format(utils.dice_coefficient(gt_vessels,pred_vessels, masks)) print "f1 score : {}, accuracy : {}, specificity : {}, sensitivity : {}".format(*utils.misc_measures(gt_vessels,pred_vessels, masks)) # compute false positive rate, true positive graph method=os.path.basename(result) methods.append(method) if method=='CRFs' or method=='2nd_manual': cm=confusion_matrix(gt_vessels_in_mask, pred_vessels_in_mask) fpr=1-1.*cm[0,0]/(cm[0,1]+cm[0,0]) tpr=1.*cm[1,1]/(cm[1,0]+cm[1,1]) prec=1.*cm[1,1]/(cm[0,1]+cm[1,1]) recall=tpr else: fpr, tpr, _ = roc_curve(gt_vessels_in_mask, pred_vessels_in_mask) prec, recall, _ = precision_recall_curve(gt_vessels_in_mask, pred_vessels_in_mask) fprs.append(fpr)
for index in range(gt_vessels.shape[0]): diff_map, dice_coeff = utils.difference_map( gt_vessels[index, ...], pred_vessels[index, ...], masks[index, ...]) dice_list.append(dice_coeff) Image.fromarray(diff_map.astype(np.uint8)).save( os.path.join(comp_dir, os.path.basename(filenames[index]))) # print "indices of best dice coeff : {}".format(sorted(range(len(dice_list)),key=lambda k: dice_list[k])) # skip the ground truth if "1st_manual" not in result: # print metrics print "-- {} --".format(os.path.basename(result)) print "dice coefficient : {}".format( utils.dice_coefficient(gt_vessels, pred_vessels, masks)) print "f1 score : {}, accuracy : {}, sensitivity : {}, specificity : {}".format( *utils.misc_measures_evaluation(gt_vessels, pred_vessels, masks)) # compute false positive rate, true positive graph method = os.path.basename(result) methods.append(method) if method == 'CRFs' or method == '2nd_manual': cm = confusion_matrix(gt_vessels_in_mask, pred_vessels_in_mask) fpr = 1 - 1. * cm[0, 0] / (cm[0, 1] + cm[0, 0]) tpr = 1. * cm[1, 1] / (cm[1, 0] + cm[1, 1]) prec = 1. * cm[1, 1] / (cm[0, 1] + cm[1, 1]) recall = tpr if method == '2nd_manual':
def calculate_dice_coefficient(self, sess, input_paths, target_mask_paths, dataset, num_samples=100, plot=False, print_to_screen=False): """ Calculates the dice coefficient score for a dataset, represented by a list of {input_paths} and {target_mask_paths}. Inputs: - sess: A TensorFlow Session object. - input_paths: A list of Python strs that represent pathnames to input image files. - target_mask_paths: A list of Python strs that represent pathnames to target mask files. - dataset: A Python str that represents the dataset being tested. Options: {train,dev}. Just for logging purposes. - num_samples: A Python int that represents the number of samples to test. If num_samples=None, then test whole dataset. - plot: A Python bool. If True, plots each example to screen. Outputs: - dice_coefficient: A Python float that represents the average dice coefficient across the sampled examples. """ logging.info(f"Calculating dice coefficient for {num_samples} examples " f"from {dataset}...") tic = time.time() dice_coefficient_total = 0. num_examples = 0 # To be used to save mask sizes for comparison predicted_mask_sizes = [] target_mask_sizes = [] sbg = SliceBatchGenerator(input_paths, target_mask_paths, self.FLAGS.batch_size, shape=(self.FLAGS.slice_height, self.FLAGS.slice_width), use_fake_target_masks=self.FLAGS.use_fake_target_masks) for batch in sbg.get_batch(): predicted_masks = self.get_predicted_masks_for_batch(sess, batch) zipped_masks = zip(predicted_masks, batch.target_masks_batch, batch.input_paths_batch, batch.target_mask_path_lists_batch) for idx, (predicted_mask, target_mask, input_path, target_mask_path_list) in enumerate(zipped_masks): dice_coefficient = utils.dice_coefficient(predicted_mask, target_mask) if dice_coefficient >= 0.0: dice_coefficient_total += dice_coefficient num_examples += 1 if print_to_screen: # Whee! We predicted at least one lesion pixel! logging.info(f"Dice coefficient of valid example {num_examples}: " f"{dice_coefficient}") if plot: if self.FLAGS.mode == 'eval': # Save mask sizes for comparison predicted_mask_sizes.append(np.sum(predicted_mask)) target_mask_sizes.append(np.sum(target_mask)) f, axarr = plt.subplots(1, 2) f.suptitle(input_path) axarr[0].imshow(predicted_mask) axarr[0].set_title("Predicted") axarr[1].imshow(target_mask) axarr[1].set_title("Target") examples_dir = os.path.join(self.FLAGS.train_dir, "examples") if not os.path.exists(examples_dir): os.makedirs(examples_dir) f.savefig(os.path.join(examples_dir, str(num_examples).zfill(4))) if num_samples != None and num_examples >= num_samples: break if num_samples != None and num_examples >= num_samples: break if num_samples < 200 and self.FLAGS.mode == 'eval': predicted_mask_sizes = np.array(predicted_mask_sizes) target_mask_sizes =np.array(target_mask_sizes) args = predicted_mask_sizes.argsort() predicted_mask_sizes = predicted_mask_sizes[args] target_mask_sizes = target_mask_sizes[args] fig, ax = plt.subplots() ind = 2*np.arange(num_examples) rects1 = ax.bar(ind, predicted_mask_sizes, 0.5, color='r') rects2 = ax.bar(ind + 0.5, target_mask_sizes, 0.5, color='b') ax.set_ylabel('Size (in pixels)') ax.set_title('Predicted and Target Mask Sizes') ax.set_xticks(ind + 0.25) ax.set_xticklabels(0.5*ind) ax.legend((rects1[0], rects2[0]), ('Predicted', 'Target')) fig.savefig(os.path.join(self.FLAGS.train_dir, 'relative_sizes')) dice_coefficient_mean = dice_coefficient_total / num_examples toc = time.time() logging.info(f"Calculating dice coefficient took {toc-tic} sec.") return dice_coefficient_mean
def train(self, A, B, EPOCHS=100, BATCH_SIZE=128, WARMUP_STEP=20, NUM_IMG=5): # Define the groundtruth Y_real = np.ones((BATCH_SIZE, 1)) Y_rec = np.zeros((BATCH_SIZE, 1)) # Reconstructed Label Y_both = np.concatenate((Y_real, Y_rec), axis=0) # Log for TensorBoard summary_writer = tf.summary.create_file_writer(self.log_dir) # Initialize the checkpoint interval = int(EPOCHS // 5) if EPOCHS >= 10 else 5 checkpoint_path = os.path.join(self.checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint(E_optimizerA=self.E_optA, G_optimizerA=self.G_optA, D_optimizerA=self.D_optA, E_optimizerB=self.E_optB, G_optimizerB=self.G_optB, D_optimizerB=self.D_optB, encoderA=self.encoderA, generatorA=self.generatorA, discriminatorA=self.discriminatorA, pioneerA=self.pioneerA, successorA=self.successorA, coordinatorA=self.coordinatorA, encoderB=self.encoderB, generatorB=self.generatorB, discriminatorB=self.discriminatorB, pioneerB=self.pioneerB, successorB=self.successorB, coordinatorB=self.coordinatorB) # Restore the latest checkpoint in checkpoint_dir checkpoint.restore(tf.train.latest_checkpoint(self.checkpoint_dir)) A_gen_list, B_gen_list, AB_rec_list = [], [], [] match_cnt, total_cnt = 0, 0 # Initialize the counting for matching DSC NUM_BATCH = len(A) // BATCH_SIZE # np.ceil() for epoch in range(EPOCHS): for nb in range(NUM_BATCH-1): # ---Pretrain Stage --- # Select real instances batch by batch step = int(epoch * NUM_BATCH + nb) idx = np.arange(nb*BATCH_SIZE, nb*BATCH_SIZE+BATCH_SIZE) A_real = A[idx, :, :, :] # Generate a batch of latent variables based on uniform distribution z_A = np.random.uniform(-1.0, 1.0, size=[BATCH_SIZE, self.latent_dim]) A_gen = self.generatorA.predict(z_A) # Train the discriminator (real for 1 and rec for 0) ------ A_both = np.concatenate((A_real, A_gen)) Dloss_A = self.discriminatorA.train_on_batch(A_both, Y_both) # Train the pioneer model to fool the discriminator ------ Gloss_A = self.pioneerA.train_on_batch(z_A, Y_real) # Repeat the same procedure as above for B B_real = B[idx, :, :, :] z_B = np.random.uniform(-1.0, 1.0, size=[BATCH_SIZE, self.latent_dim]) B_gen = self.generatorB.predict(z_B) B_both = np.concatenate((B_real, B_gen)) Dloss_B = self.discriminatorB.train_on_batch(B_both, Y_both) Gloss_B = self.pioneerB.train_on_batch(z_B, Y_real) # Train the successor & coordinator when epoch > WARMUP_STEP ------ mse = -1 if epoch > WARMUP_STEP: mseA_gen = self.successorA.train_on_batch(B_real, A_gen) mseB_gen = self.successorB.train_on_batch(A_real, B_gen) mseA_real = self.successorA.train_on_batch(B_real, A_real) mseB_real = self.successorB.train_on_batch(A_real, B_real) mse_B2B = self.coordinatorA.train_on_batch(B_real, B_real) mse_A2A = self.coordinatorB.train_on_batch(A_real, A_real) mse_A = 0.5 * np.add(mseA_real, mseA_gen) mse_B = 0.5 * np.add(mseB_real, mseB_gen) identity_loss = 0.5 * np.add(mse_A, mse_B) pair_matched_loss = 0.5 * np.add(mse_A2A, mse_B2B) mse = np.mean([identity_loss, pair_matched_loss], axis=0) # For Experiments and Visualization --------------------- # Save scalars into TensorBoard with summary_writer.as_default(): tf.summary.scalar('D_loss_A', Dloss_A[0], step=step) tf.summary.scalar('G_loss_A', Gloss_A[0], step=step) tf.summary.scalar('D_acc_A', Dloss_A[1], step=step) tf.summary.scalar('D_loss_B', Dloss_B[0], step=step) tf.summary.scalar('G_loss_B', Gloss_B[0], step=step) tf.summary.scalar('D_acc_B', Dloss_B[1], step=step) if mse != -1: tf.summary.scalar('MSE_A', mse_A, step=step) tf.summary.scalar('MSE_B', mse_B, step=step) tf.summary.scalar('Identity_Loss', identity_loss, step=step) tf.summary.scalar('Pair_Matched_Loss', pair_matched_loss, step=step) tf.summary.scalar('MSE', mse, step=step) # Save the checkpoint at given interval if (step + 1) % int(interval*BATCH_SIZE) == 0: checkpoint.save(file_prefix=str(checkpoint_path)) # Schedule the learning rate if (epoch + 1) % 100000 == 0: self.lr = self.lr_scheduler(self.lr, Type="Periodic", epoch=epoch, period=100000) # Kears callback: https://keras.io/zh/callbacks/ # Store the generated/reconstructed samples if (step + 1) % 100 == 0: z_gen = np.random.normal(size=(int(NUM_IMG), self.latent_dim)) A_gen_list.append(self.prediction(self.generatorA, z_gen)) B_gen_list.append(self.prediction(self.generatorB, z_gen)) if mse != -1 and (step + 1) % 100 == 0: # Prediction A_rec = self.prediction(self.successorA, B_real) B_rec = self.prediction(self.successorB, A_real) for i in range(len(idx)): total_cnt += 1 # Reshape image to 2D size A_rec_i = A_rec[i].reshape(self.img_shape[0], self.img_shape[1]) B_rec_i = B_rec[i].reshape(self.img_shape[0], self.img_shape[1]) # Get the binary masks of images A_rec_i_bi = convert2binary(A_rec_i, A_rec_i.max()*0.6) B_rec_i_bi = convert2binary(B_rec_i, B_rec_i.max()*0.3) # Compute the DSC DSC = dice_coefficient(A_rec_i_bi, B_rec_i_bi) # Compute matching_index & select rec samples if DSC < 0.2: match_cnt += 1 AB_rec_list.append([A_rec[i], B_rec[i]]) # Plot the progress print("A: No.{0}: D_loss: {1}; D_acc: {2}; G_loss: {3}."\ .format(step, Dloss_A[0], Dloss_A[1], Gloss_A[0])) print("B: No.{0}: D_loss: {1}; D_acc: {2}; G_loss: {3}."\ .format(step, Dloss_B[0], Dloss_B[1], Gloss_B[0])) if mse != -1: print("Total MSE: {0}.".format(mse)) print("----------") # Save file np.save("./A_gen_baait.npy", A_gen_list) np.save("./B_gen_baait.npy", B_gen_list) np.save("./AB_rec_baait.npy", AB_rec_list) checkpoint.save(file_prefix = str(checkpoint_path)) # Evaluation print("Evaluation:") if total_cnt != 0: matching_index = match_cnt/total_cnt print("Matching Index: ", matching_index)
def train(trainDataLoader, validDataLoader, net, optimizer, criterion, use_gpu): epochs = 50 trainLoss = [] validLoss = [] trainDiceCoeff = [] validDiceCoeff = [] start = time.time() bestValidDice = 0 for epoch in range(epochs): epochStart = time.time() trainRunningLoss = 0 validRunningLoss = 0 trainBatches = 0 validBatches = 0 trainDice = 0 validDice = 0 net.train(True) for data in tqdm.tqdm(trainDataLoader): inputs, labels = data if use_gpu: inputs = inputs.cuda() labels = labels.cuda() probs = net(inputs) loss = criterion(probs.view(-1), labels.view(-1)) preds = (probs > 0.5).float() optimizer.zero_grad() loss.backward() optimizer.step() trainRunningLoss += loss.item() trainDice += dice_coefficient(preds, labels).item() trainBatches += 1 trainLoss.append(trainRunningLoss / trainBatches) trainDiceCoeff.append(trainDice / trainBatches) net.train(False) for data in validDataLoader: inputs, labels = data if use_gpu: inputs = inputs.cuda() labels = labels.cuda() probs = net(inputs) loss = criterion(probs.view(-1), labels.view(-1)) preds = (probs > 0.5).float() validDice += dice_coefficient(preds, labels).item() validRunningLoss += loss.item() validBatches += 1 validLoss.append(validRunningLoss / validBatches) validDiceCoeff.append(validDice / validBatches) if validDice > bestValidDice: bestValidDice = validDice torch.save(net.state_dict(), 'SUMNet.pt') epochEnd = time.time() - epochStart print('Epoch: {:.0f}/{:.0f} | Train Loss: {:.3f} | Valid Loss: {:.3f} | Train Dice: {:.3f} | Valid Dice: {:.3f}'\ .format(epoch+1, epochs, trainRunningLoss/trainBatches, validRunningLoss/validBatches, trainDice/trainBatches, validDice/validBatches)) print('Time: {:.0f}m {:.0f}s'.format(epochEnd // 60, epochEnd % 60)) end = time.time() - start print('Training completed in {:.0f}m {:.0f}s'.format(end // 60, end % 60)) trainLoss = np.array(trainLoss) validLoss = np.array(validLoss) trainDiceCoeff = np.array(trainDiceCoeff) validDiceCoeff = np.array(validDiceCoeff) DF = pd.DataFrame({ 'Train Loss': trainLoss, 'Valid Loss': validLoss, 'Train Dice': trainDiceCoeff, 'Valid Dice': validDiceCoeff }) return DF
def train(trainDataLoader, validDataLoader, net, optimizer, scheduler, criterion, use_gpu): i = 0 j = 0 epochs = 20 trainLoss = [] validLoss = [] trainDiceCoeff = [] validDiceCoeff = [] start = time.time() bestValidDice = 0 for epoch in range(epochs): epochStart = time.time() trainRunningLoss = 0 validRunningLoss = 0 trainBatches = 0 validBatches = 0 trainDice = 0 validDice = 0 net.train(True) bar = tqdm.tqdm(trainDataLoader) for data in bar: inputs, labels = data # # islabels_0 = torch.nonzero(labels) if use_gpu: inputs = inputs.cuda() labels = labels.cuda() probs = net(inputs) loss = criterion(probs.view(-1), labels.view(-1)) preds = (probs > 0.5).float() # ispreds_0 = torch.nonzero(preds) optimizer.zero_grad() loss.backward() optimizer.step() trainRunningLoss += loss.item() trainDice += dice_coefficient(preds, labels).item() trainBatches += 1 vis.plot("lr", optimizer.state_dict()['param_groups'][0]['lr']) vis.plot("loss", loss.item()) bar.set_postfix(loss=loss.item()) if epoch > 0 and epoch % 5 == 0: for p in optimizer.param_groups: p['lr'] *= 0.5 trainLoss.append(trainRunningLoss / trainBatches) trainDiceCoeff.append(trainDice / trainBatches) net.train(False) for data in validDataLoader: inputs, labels = data if use_gpu: inputs = inputs.cuda() labels = labels.cuda() probs = net(inputs) loss = criterion(probs.view(-1), labels.view(-1)) preds = (probs > 0.5).float() validDice += dice_coefficient(preds, labels).item() validRunningLoss += loss.item() validBatches += 1 validLoss.append(validRunningLoss / validBatches) validDiceCoeff.append(validDice / validBatches) if validDice >= bestValidDice: bestValidDice = validDice torch.save(net.state_dict(), 'SUMNet_3channel.pt') epochEnd = time.time() - epochStart print('Epoch: {:.0f}/{:.0f} | Train Loss: {:.3f} | Valid Loss: {:.3f} | Train Dice: {:.3f} | Valid Dice: {:.3f}' \ .format(epoch + 1, epochs, trainRunningLoss / trainBatches, validRunningLoss / validBatches, trainDice / trainBatches, validDice / validBatches)) print('Time: {:.0f}m {:.0f}s'.format(epochEnd // 60, epochEnd % 60)) end = time.time() - start print('Training completed in {:.0f}m {:.0f}s'.format(end // 60, end % 60)) trainLoss = np.array(trainLoss) validLoss = np.array(validLoss) trainDiceCoeff = np.array(trainDiceCoeff) validDiceCoeff = np.array(validDiceCoeff) DF = pd.DataFrame({ 'Train Loss': trainLoss, 'Valid Loss': validLoss, 'Train Dice': trainDiceCoeff, 'Valid Dice': validDiceCoeff }) return DF
'generated_mask_{epoch}.png'.format(epoch=epoch)), np.uint8(255 * (final * 0.5 + 0.5))) cv2.imshow("generated", final * 0.5 + 0.5) cv2.waitKey(10) if (epoch % validate_each == 0): dice_coeffs = [] counter = 0 for j, (inputs, gt_masks) in enumerate(validation_loader): netG.eval() inputs, gt_masks = inputs.to(device), gt_masks.to(device) pred_masks = netG(inputs) pred_masks_cpu = pred_masks.data.cpu().numpy() inputs_cpu = inputs.data.cpu().numpy() dice_coeff = utils.dice_coefficient( ((pred_masks * 0.5 + 0.5) > 0.5).float(), (gt_masks * 0.5 + 0.5).float()) dice_coeffs.append(dice_coeff.item()) mean_dice_coeffs = np.mean(np.array(dice_coeffs)) tq.set_postfix( loss=' D={:.5f}, G={:.5f}, validation dice score={:.5f}'. format(np.mean(D_losses), np.mean(G_losses), mean_dice_coeffs)) if (mean_dice_coeffs > best_mean_dice_coeffs): counter = 0 for j, (inputs, gt_masks) in enumerate(validation_loader): netG.eval() inputs, gt_masks = inputs.to(device), gt_masks.to(device) pred_masks = netG(inputs) pred_masks_cpu = pred_masks.data.cpu().numpy() inputs_cpu = inputs.data.cpu().numpy()