def __init__(self, debug=False): self.debug = debug self.mode = "train" self.both_to_tensor = ComposeJoint( [[transforms.ToTensor(), transforms.ToTensor()]])
return annotations.view(-1) def get_valid_annotations_index(flatten_annotations, mask_out_value=255): return torch.squeeze( torch.nonzero((flatten_annotations != mask_out_value )), 1) number_of_classes = 21 labels = range(number_of_classes) train_transform = ComposeJoint( [ RandomHorizontalFlipJoint(), #[ResizeAspectRatioPreserve(greater_side_size=348), # ResizeAspectRatioPreserve(greater_side_size=348, interpolation=Image.NEAREST)], # RandomScaleJoint(low=0.9, high=1.1), # [CropOrPad(output_size=(348, 348)), CropOrPad(output_size=(348, 348), fill=255)], [transforms.ToTensor(), None], [transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), None], [None, transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long()) ] ]) trainset = PascalVOCSegmentation('datasets', download=False, joint_transform=train_transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True, num_workers=2)
def run(sql_db_model): """ Trains a Resnet-18 network on Endovis 2017. Trains a Resnet-18 network previously trained on imagenet on the data of Endovis 2017 challenge. The script trains a binary segmentation network with an output stride of output_stride. Parameters ---------- batch_size : int Size of a batch to use during training. learning_rate : float Lerning rate to be used by optimization algorithm. output_stride : int Output stride of the network. Can we 32/16/8. Gives more finegrained predictions but at a cost of more computation (8 is the best; 32 is the worst. """ batch_size = sql_db_model.batch_size learning_rate = sql_db_model.learning_rate output_stride = sql_db_model.output_stride gpu_id = sql_db_model.gpu_id os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) experiment = Experiment(sql_db_model) number_of_classes = 2 labels = range(number_of_classes) train_transform = ComposeJoint([ # Crop to the actual view of the endoscop camera [ transforms.CenterCrop((1024, 1280)), transforms.CenterCrop((1024, 1280)) ], RandomHorizontalFlipJoint(), RandomCropJoint(crop_size=(224, 224)), [transforms.ToTensor(), None], [ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), None ], [ None, transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long()) ] ]) trainset = Endovis_Instrument_2017( root= '/home/daniil/.pytorch-segmentation-detection/datasets/endovis_2017', dataset_type=0, joint_transform=train_transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) valid_transform = ComposeJoint([ [ transforms.CenterCrop((1024, 1280)), transforms.CenterCrop((1024, 1280)) ], [transforms.ToTensor(), None], [ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), None ], [ None, transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long()) ] ]) valset = Endovis_Instrument_2017( root= '/home/daniil/.pytorch-segmentation-detection/datasets/endovis_2017', dataset_type=0, joint_transform=valid_transform, train=False) valset_loader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=False, num_workers=1) train_subset_sampler = torch.utils.data.sampler.SubsetRandomSampler( xrange(223)) train_subset_loader = torch.utils.data.DataLoader( dataset=trainset, batch_size=1, sampler=train_subset_sampler, num_workers=2) # Define the validation function to track MIoU during the training def validate(): fcn.eval() overall_confusion_matrix = None for image, annotation in valset_loader: image = Variable(image.cuda()) logits = fcn(image) # First we do argmax on gpu and then transfer it to cpu logits = logits.data _, prediction = logits.max(1) prediction = prediction.squeeze(1) prediction_np = prediction.cpu().numpy().flatten() annotation_np = annotation.numpy().flatten() # Mask-out value is ignored by default in the sklearn # read sources to see how that was handled current_confusion_matrix = confusion_matrix(y_true=annotation_np, y_pred=prediction_np, labels=labels) if overall_confusion_matrix is None: overall_confusion_matrix = current_confusion_matrix else: overall_confusion_matrix += current_confusion_matrix intersection = np.diag(overall_confusion_matrix) ground_truth_set = overall_confusion_matrix.sum(axis=1) predicted_set = overall_confusion_matrix.sum(axis=0) union = ground_truth_set + predicted_set - intersection intersection_over_union = intersection / union.astype(np.float32) mean_intersection_over_union = np.mean(intersection_over_union) fcn.train() return mean_intersection_over_union def validate_train(): fcn.eval() overall_confusion_matrix = None for image, annotation in train_subset_loader: image = Variable(image.cuda()) logits = fcn(image) # First we do argmax on gpu and then transfer it to cpu logits = logits.data _, prediction = logits.max(1) prediction = prediction.squeeze(1) prediction_np = prediction.cpu().numpy().flatten() annotation_np = annotation.numpy().flatten() # Mask-out value is ignored by default in the sklearn # read sources to see how that was handled current_confusion_matrix = confusion_matrix(y_true=annotation_np, y_pred=prediction_np, labels=labels) if overall_confusion_matrix is None: overall_confusion_matrix = current_confusion_matrix else: overall_confusion_matrix += current_confusion_matrix intersection = np.diag(overall_confusion_matrix) ground_truth_set = overall_confusion_matrix.sum(axis=1) predicted_set = overall_confusion_matrix.sum(axis=0) union = ground_truth_set + predicted_set - intersection intersection_over_union = intersection / union.astype(np.float32) mean_intersection_over_union = np.mean(intersection_over_union) fcn.train() return mean_intersection_over_union loss_current_iteration = 0 loss_history = [] loss_iteration_number_history = [] validation_current_iteration = 0 validation_history = [] validation_iteration_number_history = [] train_validation_current_iteration = 0 train_validation_history = [] train_validation_iteration_number_history = [] fcn = resnet_dilated.Resnet18_8s(num_classes=2) fcn.cuda() fcn.train() criterion = nn.CrossEntropyLoss(size_average=False).cuda() optimizer = optim.Adam(fcn.parameters(), lr=learning_rate, weight_decay=0.0001) best_validation_score = 0 current_validation_score = 0 iter_size = 20 epochs = range(10) for epoch in epochs: # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs img, anno = data # We need to flatten annotations and logits to apply index of valid # annotations. All of this is because pytorch doesn't have tf.gather_nd() anno_flatten = flatten_annotations(anno) index = get_valid_annotations_index(anno_flatten, mask_out_value=255) anno_flatten_valid = torch.index_select(anno_flatten, 0, index) # wrap them in Variable # the index can be acquired on the gpu img, anno_flatten_valid, index = Variable(img.cuda()), Variable( anno_flatten_valid.cuda()), Variable(index.cuda()) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize logits = fcn(img) logits_flatten = flatten_logits(logits, number_of_classes=2) logits_flatten_valid = torch.index_select(logits_flatten, 0, index) loss = criterion(logits_flatten_valid, anno_flatten_valid) loss.backward() optimizer.step() # Be very carefull about the things that you append to the results # array, once we accidently put a gpu tensor there which got pickeled # and unpickled later on, taking the gpu memory and causing unobvious problems running_loss += (loss.data[0].cpu().clone().numpy() / logits_flatten_valid.size(0)) if i % 2 == 1: loss_history.append(running_loss / 2) loss_iteration_number_history.append(loss_current_iteration) experiment.add_next_iteration_results( training_loss=running_loss / 2) loss_current_iteration += 1 running_loss = 0.0 #print("Iteration #{}; Epoch #{}".format(i, epoch) ) current_validation_score = validate() validation_history.append(current_validation_score) validation_iteration_number_history.append( validation_current_iteration) validation_current_iteration += 1 current_train_validation_score = validate_train() train_validation_history.append(current_train_validation_score) train_validation_iteration_number_history.append( train_validation_current_iteration) train_validation_current_iteration += 1 experiment.add_next_iteration_results( training_accuracy=current_train_validation_score, validation_accuracy=current_validation_score) # Save the model if it has a better MIoU score. if current_validation_score > best_validation_score: model_save_path = experiment.get_best_model_file_save_path() torch.save(fcn.state_dict(), model_save_path) #torch.save(fcn.state_dict(), 'resnet_18_8s_best.pth') best_validation_score = current_validation_score experiment.update_best_iteration_results( validation_accuracy=current_validation_score) print('Finished Training') print('Best validation score is: ' + str(best_validation_score)) return 'success'
# [transforms.ToTensor(), None], # [transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), None], # [None, transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long()) ] # ]) # trainset = PascalVOCSegmentation('datasets', # download=False, # joint_transform=train_transform) # trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, # shuffle=True, num_workers=4) valid_transform = ComposeJoint( [ [transforms.ToTensor(), None], [transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), None], [None, transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long())] ]) valset = PascalVOCSegmentation('datasets', train=False, download=False, joint_transform=valid_transform) valset_loader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=False, num_workers=0) # train_subset_sampler = torch.utils.data.sampler.SubsetRandomSampler(xrange(904)) # train_subset_loader = torch.utils.data.DataLoader(dataset=trainset, batch_size=1, # sampler=train_subset_sampler,