Esempio n. 1
0
def inference(sql_db_model, input_image_np):

    relative_model_path = sql_db_model.model_path
    full_model_path = os.path.join(models_save_folder_path,
                                   relative_model_path)

    valid_transform = transforms.Compose([
        transforms.Lambda(lambda x: Image.fromarray(x)),
        transforms.CenterCrop((1024, 1280)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    img = valid_transform(input_image_np)

    img = img.unsqueeze(0)

    img = img.cuda()

    fcn = resnet_dilated.Resnet18_8s(num_classes=2)

    fcn.load_state_dict(torch.load(full_model_path))
    fcn.cuda()
    fcn.eval()

    res = fcn(img)

    # (h, w)
    _, res = res.squeeze(0).max(0)

    # Temporarly multiplying by 100 for contrast
    res_np = res.cpu().detach().numpy().copy().astype(np.uint8) * 100

    return res_np
Esempio n. 2
0
def run(sql_db_model):
    """ Trains a Resnet-18 network on Endovis 2017.
    
    Trains a Resnet-18 network previously trained on imagenet on the
    data of Endovis 2017 challenge. The script trains a binary segmentation
    network with an output stride of output_stride.
    
    Parameters
    
    ----------
    
    batch_size : int
    
        Size of a batch to use during training.
    
    
    learning_rate : float
    
    
        Lerning rate to be used by optimization algorithm.
    
    
    output_stride : int
    
    
        Output stride of the network. Can we 32/16/8. Gives more
        finegrained predictions but at a cost of more computation (8 is the best;
        32 is the worst.
    """

    batch_size = sql_db_model.batch_size
    learning_rate = sql_db_model.learning_rate
    output_stride = sql_db_model.output_stride
    gpu_id = sql_db_model.gpu_id

    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

    experiment = Experiment(sql_db_model)

    number_of_classes = 2

    labels = range(number_of_classes)

    train_transform = ComposeJoint([
        # Crop to the actual view of the endoscop camera
        [
            transforms.CenterCrop((1024, 1280)),
            transforms.CenterCrop((1024, 1280))
        ],
        RandomHorizontalFlipJoint(),
        RandomCropJoint(crop_size=(224, 224)),
        [transforms.ToTensor(), None],
        [
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            None
        ],
        [
            None,
            transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long())
        ]
    ])

    trainset = Endovis_Instrument_2017(
        root=
        '/home/daniil/.pytorch-segmentation-detection/datasets/endovis_2017',
        dataset_type=0,
        joint_transform=train_transform)

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=4)

    valid_transform = ComposeJoint([
        [
            transforms.CenterCrop((1024, 1280)),
            transforms.CenterCrop((1024, 1280))
        ], [transforms.ToTensor(), None],
        [
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            None
        ],
        [
            None,
            transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long())
        ]
    ])

    valset = Endovis_Instrument_2017(
        root=
        '/home/daniil/.pytorch-segmentation-detection/datasets/endovis_2017',
        dataset_type=0,
        joint_transform=valid_transform,
        train=False)

    valset_loader = torch.utils.data.DataLoader(valset,
                                                batch_size=1,
                                                shuffle=False,
                                                num_workers=1)

    train_subset_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        xrange(223))
    train_subset_loader = torch.utils.data.DataLoader(
        dataset=trainset,
        batch_size=1,
        sampler=train_subset_sampler,
        num_workers=2)

    # Define the validation function to track MIoU during the training
    def validate():

        fcn.eval()

        overall_confusion_matrix = None

        for image, annotation in valset_loader:

            image = Variable(image.cuda())
            logits = fcn(image)

            # First we do argmax on gpu and then transfer it to cpu
            logits = logits.data
            _, prediction = logits.max(1)
            prediction = prediction.squeeze(1)

            prediction_np = prediction.cpu().numpy().flatten()
            annotation_np = annotation.numpy().flatten()

            # Mask-out value is ignored by default in the sklearn
            # read sources to see how that was handled

            current_confusion_matrix = confusion_matrix(y_true=annotation_np,
                                                        y_pred=prediction_np,
                                                        labels=labels)

            if overall_confusion_matrix is None:

                overall_confusion_matrix = current_confusion_matrix
            else:

                overall_confusion_matrix += current_confusion_matrix

        intersection = np.diag(overall_confusion_matrix)
        ground_truth_set = overall_confusion_matrix.sum(axis=1)
        predicted_set = overall_confusion_matrix.sum(axis=0)
        union = ground_truth_set + predicted_set - intersection

        intersection_over_union = intersection / union.astype(np.float32)
        mean_intersection_over_union = np.mean(intersection_over_union)

        fcn.train()

        return mean_intersection_over_union

    def validate_train():

        fcn.eval()

        overall_confusion_matrix = None

        for image, annotation in train_subset_loader:

            image = Variable(image.cuda())
            logits = fcn(image)

            # First we do argmax on gpu and then transfer it to cpu
            logits = logits.data
            _, prediction = logits.max(1)
            prediction = prediction.squeeze(1)

            prediction_np = prediction.cpu().numpy().flatten()
            annotation_np = annotation.numpy().flatten()

            # Mask-out value is ignored by default in the sklearn
            # read sources to see how that was handled

            current_confusion_matrix = confusion_matrix(y_true=annotation_np,
                                                        y_pred=prediction_np,
                                                        labels=labels)

            if overall_confusion_matrix is None:

                overall_confusion_matrix = current_confusion_matrix
            else:

                overall_confusion_matrix += current_confusion_matrix

        intersection = np.diag(overall_confusion_matrix)
        ground_truth_set = overall_confusion_matrix.sum(axis=1)
        predicted_set = overall_confusion_matrix.sum(axis=0)
        union = ground_truth_set + predicted_set - intersection

        intersection_over_union = intersection / union.astype(np.float32)
        mean_intersection_over_union = np.mean(intersection_over_union)

        fcn.train()

        return mean_intersection_over_union

    loss_current_iteration = 0
    loss_history = []
    loss_iteration_number_history = []

    validation_current_iteration = 0
    validation_history = []
    validation_iteration_number_history = []

    train_validation_current_iteration = 0
    train_validation_history = []
    train_validation_iteration_number_history = []

    fcn = resnet_dilated.Resnet18_8s(num_classes=2)
    fcn.cuda()
    fcn.train()

    criterion = nn.CrossEntropyLoss(size_average=False).cuda()

    optimizer = optim.Adam(fcn.parameters(),
                           lr=learning_rate,
                           weight_decay=0.0001)

    best_validation_score = 0
    current_validation_score = 0

    iter_size = 20

    epochs = range(10)

    for epoch in epochs:  # loop over the dataset multiple times

        running_loss = 0.0

        for i, data in enumerate(trainloader, 0):

            # get the inputs
            img, anno = data

            # We need to flatten annotations and logits to apply index of valid
            # annotations. All of this is because pytorch doesn't have tf.gather_nd()
            anno_flatten = flatten_annotations(anno)
            index = get_valid_annotations_index(anno_flatten,
                                                mask_out_value=255)
            anno_flatten_valid = torch.index_select(anno_flatten, 0, index)

            # wrap them in Variable
            # the index can be acquired on the gpu
            img, anno_flatten_valid, index = Variable(img.cuda()), Variable(
                anno_flatten_valid.cuda()), Variable(index.cuda())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            logits = fcn(img)
            logits_flatten = flatten_logits(logits, number_of_classes=2)
            logits_flatten_valid = torch.index_select(logits_flatten, 0, index)

            loss = criterion(logits_flatten_valid, anno_flatten_valid)

            loss.backward()

            optimizer.step()

            # Be very carefull about the things that you append to the results
            # array, once we accidently put a gpu tensor there which got pickeled
            # and unpickled later on, taking the gpu memory and causing unobvious problems
            running_loss += (loss.data[0].cpu().clone().numpy() /
                             logits_flatten_valid.size(0))
            if i % 2 == 1:

                loss_history.append(running_loss / 2)
                loss_iteration_number_history.append(loss_current_iteration)

                experiment.add_next_iteration_results(
                    training_loss=running_loss / 2)

                loss_current_iteration += 1

                running_loss = 0.0

            #print("Iteration #{}; Epoch #{}".format(i, epoch) )

        current_validation_score = validate()
        validation_history.append(current_validation_score)
        validation_iteration_number_history.append(
            validation_current_iteration)

        validation_current_iteration += 1

        current_train_validation_score = validate_train()
        train_validation_history.append(current_train_validation_score)
        train_validation_iteration_number_history.append(
            train_validation_current_iteration)

        train_validation_current_iteration += 1

        experiment.add_next_iteration_results(
            training_accuracy=current_train_validation_score,
            validation_accuracy=current_validation_score)

        # Save the model if it has a better MIoU score.
        if current_validation_score > best_validation_score:

            model_save_path = experiment.get_best_model_file_save_path()
            torch.save(fcn.state_dict(), model_save_path)
            #torch.save(fcn.state_dict(), 'resnet_18_8s_best.pth')
            best_validation_score = current_validation_score
            experiment.update_best_iteration_results(
                validation_accuracy=current_validation_score)

    print('Finished Training')
    print('Best validation score is: ' + str(best_validation_score))

    return 'success'
Esempio n. 3
0
    intersection = np.diag(overall_confusion_matrix)
    ground_truth_set = overall_confusion_matrix.sum(axis=1)
    predicted_set = overall_confusion_matrix.sum(axis=0)
    union = ground_truth_set + predicted_set - intersection

    intersection_over_union = intersection / union.astype(np.float32)
    mean_intersection_over_union = np.mean(intersection_over_union)

    fcn.train()

    return mean_intersection_over_union


## Define the model and load it to the gpu
if __name__ == '__main__':
    fcn = resnet_dilated.Resnet18_8s(num_classes=21)
    fcn.load_state_dict(torch.load('resnet_18_8s_59.pth'))

    res = fcn.resnet18_8s

    for param in res.parameters():
        param.requires_grad = False

    res.fc = nn.Conv2d(res.inplanes, 3, 1)
    res.fc.weight.data.normal_(0, 0.01)
    res.fc.bias.data.zero_()
    for param in res.fc.parameters():
        param.requires_grad = True

    fcn.cuda()
    fcn.train()