예제 #1
0
def main_voc2007():
    global args, best_prec1, use_gpu
    args = parser.parse_args()

    use_gpu = torch.cuda.is_available()

    # define dataset
    train_dataset = Voc2012Classification(args.data, 'train')
    val_dataset = Voc2012Classification(args.data, 'test')
    num_classes = 20

    # load model
    model = vgg16_sp(num_classes, pretrained=True)

    print(model)

    criterion = nn.MultiLabelSoftMarginLoss()

    state = {
        'batch_size': args.batch_size,
        'max_epochs': args.epochs,
        'image_size': args.image_size,
        'evaluate': args.evaluate,
        'resume': args.resume,
        'lr': args.lr,
        'momentum': args.momentum,
        'weight_decay': args.weight_decay
    }
    state['difficult_examples'] = True
    state['save_model_path'] = 'logs/voc2007/'

    engine = MultiLabelMAPEngine(state)
    engine.multi_learning(model, criterion, train_dataset, val_dataset)
예제 #2
0
def main_voc2007():
    global args, best_prec1, use_gpu 
    args = parser.parse_args()

    use_gpu = torch.cuda.is_available()

    # define dataset
    train_dataset = Voc2007Classification(args.data, 'trainval')
    val_dataset = Voc2007Classification(args.data, 'test')
    num_classes = 20

    # load model
    model = vgg16_sp(num_classes, pretrained=True)
    
    print(model)

    criterion = nn.MultiLabelSoftMarginLoss()
    optimizer = torch.optim.SGD(model.get_config_optim(args.lr),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    state = {'batch_size': args.batch_size, 'image_size': args.image_size, 'max_epochs': args.epochs,
             'evaluate': args.evaluate, 'resume': args.resume}
    state['difficult_examples'] = True
    state['save_model_path'] = 'logs/voc2007/'

    if not os.path.exists(state['save_model_path']):
        os.makedirs(state['save_model_path'])

    engine = MultiLabelMAPEngine(state)
    engine.learning(model, criterion, train_dataset, val_dataset, optimizer)
예제 #3
0
def load_model_voc(model_path, multiscale=False, scale=224):
    model = models.vgg16_sp(20, False)
    state_dict = torch.load(model_path)
    model_dict = {}
    for sdict in state_dict:
        model.load_state_dict(sdict['state_dict'])
        model_dict[sdict['image_size']] = deepcopy(model).cuda()
    return model_dict if multiscale else {scale: model_dict[scale]}
예제 #4
0
def runObjDetection(data_dir, model_input_location, output_dir_class0,
                    output_dir_class1, output_dir_skip, categoryThreshold):
    # Data augmentation and normalization for training
    # do transforms that are normally just for validation
    # for all data
    imageScales = [1000, 750, 500,
                   250]  # how big will the whole image be in pixels
    imageMaxScale = max(imageScales)

    class regionCoordinate:
        def __init__(self, x, y, w, h, scale):
            self.x = x
            self.y = y
            self.w = w
            self.h = h
            self.scale = scale

        def __str__(self):
            return "x: {0}, y: {1}, w: {2}, h: {3}, scale: {4}".format(
                self.x, self.y, self.w, self.h, self.scale)

    network_input_width = 250  # widths the network expeccts
    regionCoordinates = [
        regionCoordinate(x, y, network_input_width, network_input_width, s)
        for s in imageScales for x in range(0, s, 250)
        for y in range(0, s, 250)
    ]

    data_transforms = transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    dset = datasets.ImageFolder(data_dir)

    use_gpu = torch.cuda.is_available()

    model = vgg16_sp(3, pretrained=True)
    model.load_state_dict(torch.load(model_input_location))
    #for param in model.parameters():
    #    param.requires_grad = False
    # if model_input_location is empty, use default weights

    model = model.cuda()

    # take in the bounds from selective search (left, top, width, height)
    # and crop the image using them
    def cropImageUsingBounds(image, coordinates):
        # cropCoordinates is in format left, upper, right, lower
        # which pillow's crop wants
        cropCoordinatesPillow = (coordinates.x, coordinates.y,
                                 coordinates.x + coordinates.w,
                                 coordinates.y + coordinates.h)

        return image.crop(cropCoordinatesPillow)

    outputs = []
    i = 0
    numPoints = str(len(dset))
    numSkipped = 0
    numClass0Right = 0
    numClass0Wrong = 0
    numClass1Right = 0
    numClass1Wrong = 0

    def printCurrentStats():
        print("percent right: " + str((numClass0Right + numClass1Right) / i))
        print("percent wrong: " + str((numClass0Wrong + numClass1Wrong) / i))
        print("percent skipped: " + str(numSkipped / i))
        print("class 0 right: " + str(numClass0Right))
        print("class 0 wrong: " + str(numClass0Wrong))
        print("class 1 right: " + str(numClass1Right))
        print("class 1 wrong: " + str(numClass1Wrong))
        print("skipped: " + str(numSkipped))

    os.system("mkdir -p " + output_dir_class0 + "/right/")
    os.system("mkdir -p " + output_dir_class0 + "/wrong/")
    os.system("mkdir -p " + output_dir_class0 + "/heatmap/")
    os.system("mkdir -p " + output_dir_class1 + "/right/")
    os.system("mkdir -p " + output_dir_class1 + "/wrong/")
    os.system("mkdir -p " + output_dir_class1 + "/heatmap/")
    os.system("mkdir -p " + output_dir_skip)
    for dataPoint in dset:
        pil_image_unresized, labelIndex = dataPoint
        pil_image = pil_image_unresized.resize((imageMaxScale, imageMaxScale))
        print("Working on element " + str(i) + " of " + numPoints, flush=True)
        fileName = os.path.basename(dset.imgs[i][0])
        fileFullPath = dset.imgs[i][0]
        fileNameWithoutExt = os.path.splitext(fileName)[0]
        i += 1

        imageRegions = []
        imageRegionsAsTensors = []
        for regionCoordinate in regionCoordinates:
            croppedImage = cropImageUsingBounds(pil_image, regionCoordinate)
            imageRegions.append(croppedImage)
            imageAsTensorForEval = data_transforms(croppedImage)
            imageRegionsAsTensors.append(imageAsTensorForEval)
        imageRegionsAsOneTensor = torch.stack(imageRegionsAsTensors)
        # wrap them in Variable
        if use_gpu:
            inputs = Variable(imageRegionsAsOneTensor.cuda())
        else:
            inputs = Variable(imageRegionsAsOneTensor)
        # based on __getitem__ implementation of datasets.ImageLoader, imgs index matches that of items
        classProbabilityTensor = F.softmax(model(inputs)).data
        # take the region with the max probability of being the desired
        # class
        # max(0) gives the indices and values of the max in the first
        # dimension fo tensor, which is max probability of being in
        # each category
        mostLikely = classProbabilityTensor.max(0)
        # in mostLikely, first [0] gives the probabilities of the boxes that are most likely to be in each class. skip if both max probabilities < 0.9
        # second [0] gives the probability of the element with the max probability
        # of being the first class (0 indexing)
        print("max probabilites are " + str(mostLikely[0]))
        if mostLikely[0][0] < categoryThreshold and mostLikely[0][
                1] < categoryThreshold:
            print("dropping image " + fileName +
                  " as probabilites were all less than " +
                  str(categoryThreshold))
            #makeAndSaveToFileCamClassificationHeatmap(model_input_location, fileFullPath,
            #                                          output_dir_skip + "/" + fileNameWithoutExt + "_0.jpg", label_map, 0)
            #makeAndSaveToFileCamClassificationHeatmap(model_input_location, fileFullPath,
            #                                          output_dir_skip + "/" + fileNameWithoutExt + "_1.jpg", label_map, 1)
            numSkipped += 1
        # write to the folder for class 0 or 1 depending on which is most likely
        # if likely to be in both classes, write to both
        if mostLikely[0][0] > categoryThreshold:
            print("think image " + fileName +
                  " is class 0 as most likely object was: " +
                  str(mostLikely[0]))
            indexOfMostLikely = classProbabilityTensor.max(0)[1][0]
            # [1] gives the indices instead of the probabilities
            pil_image.save(output_dir_class0 + "/" + fileName)
            #make the cam heatmap for this class
            #makeAndSaveToFileCamClassificationHeatmap(model_input_location, output_dir_class0 + "/" + fileName,
            #                                 output_dir_class0 + "/heatmap/" + fileName, label_map, 0)
            if labelIndex == 0:
                numClass0Right += 1
                imageRegions[indexOfMostLikely].save(output_dir_class0 +
                                                     "/right/" + fileName)
            else:
                numClass0Wrong += 1
                imageRegions[indexOfMostLikely].save(output_dir_class0 +
                                                     "/wrong/" + fileName)
        if mostLikely[0][1] > categoryThreshold:
            print("think image " + fileName +
                  " is class 1 as most likely object was: " +
                  str(mostLikely[0]))
            indexOfMostLikely = classProbabilityTensor.max(0)[1][1]
            # [1] gives the indices instead of the probabilities
            pil_image.save(output_dir_class1 + "/" + fileName)
            #makeAndSaveToFileCamClassificationHeatmap(model_input_location, output_dir_class1 + "/" + fileName,
            #                                 output_dir_class1 + "/heatmap/" + fileName, label_map, 1)
            if labelIndex == 1:
                numClass1Right += 1
                imageRegions[indexOfMostLikely].save(output_dir_class1 +
                                                     "/right/" + fileName)
            else:
                numClass1Wrong += 1
                imageRegions[indexOfMostLikely].save(output_dir_class1 +
                                                     "/wrong/" + fileName)
        printCurrentStats()
예제 #5
0

def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7):
    """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
    lr = init_lr * (0.1**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer


model = vgg16_sp(20, pretrained=True)
checkpoint = torch.load(model_checkpoint_location)
model.load_state_dict(checkpoint['state_dict'])
#for param in model_ft.parameters():
#    param.requires_grad = False
num_maps = 1024
model.classifier = nn.Sequential(nn.Dropout(0.5),
                                 nn.Linear(num_maps, len(dset_classes)))

if use_gpu:
    model_ft = model.cuda()

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
예제 #6
0
def transferLearn(data_dir, model_checkpoint_location, label_map_file,
                  model_output_file):
    # Data augmentation and normalization for training
    # Just normalization for validation
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            torchsample.transforms.RandomAffine(90, (0.5, 0.5), 90),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val':
        transforms.Compose([
            transforms.Scale(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    dsets = {
        x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
        for x in ['train', 'val']
    }

    #https://discuss.pytorch.org/t/how-to-enable-the-dataloader-to-sample-from-each-class-with-equal-probability/911 - if i need stratified sampling
    #https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3
    def make_weights_for_balanced_classes(images, nclasses):
        count = [0] * nclasses
        for item in images:
            count[item[1]] += 1
        weight_per_class = [0.] * nclasses
        N = float(sum(count))
        for i in range(nclasses):
            weight_per_class[i] = N / float(count[i])
        weight = [0] * len(images)
        for idx, val in enumerate(images):
            weight[idx] = weight_per_class[val[1]]
        return weight

    weights = make_weights_for_balanced_classes(dsets['train'].imgs,
                                                len(dsets['train'].classes))
    weights = torch.DoubleTensor(weights)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(
        weights, len(weights))

    dset_loaders = {}
    dset_loaders['train'] = torch.utils.data.DataLoader(dsets['train'],
                                                        batch_size=15,
                                                        shuffle=False,
                                                        sampler=sampler,
                                                        num_workers=5,
                                                        pin_memory=True)

    dset_loaders['val'] = torch.utils.data.DataLoader(dsets['val'],
                                                      batch_size=15,
                                                      shuffle=True,
                                                      num_workers=5,
                                                      pin_memory=True)

    #dset_loaders2 = {x: torch.utils.data.DataLoader(dsets[x], batch_size=15,
    #                                               shuffle=False, num_workers=4)
    #                for x in ['train', 'val']}
    dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
    dset_classes = dsets['train'].classes

    with open(label_map_file, 'a') as f:
        f.write(str([(i, c) for i, c in enumerate(dset_classes)]))

    print("Classes to index mapping is" +
          str([(i, c) for i, c in enumerate(dset_classes)]))

    use_gpu = torch.cuda.is_available()

    # Get a batch of training data
    inputs, classes = next(iter(dset_loaders['train']))

    # Make a grid from batch
    out = torchvision.utils.make_grid(inputs)

    def classIndexToProbability(classIdx, class_to_idx_map):
        idx_to_class = {v: k for k, v in class_to_idx_map.items()}
        return [float(x) for x in idx_to_class[classIdx].split(",")]

    def train_model(model, criterion, optimizer, lr_scheduler, num_epochs):
        since = time.time()

        best_model = model
        best_acc = 0.0
        #final_layer_weights_last_iteration = model.fc.weight.clone()

        for epoch in range(num_epochs):
            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    optimizer = lr_scheduler(optimizer, epoch)
                    model.train(True)  # Set model to training mode
                else:
                    model.train(False)  # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for data in dset_loaders[phase]:
                    # get the inputs
                    inputs, labels = data

                    # wrap them in Variable
                    if use_gpu:
                        inputs = Variable(inputs.cuda())
                        labels = Variable(labels.cuda())
                    else:
                        inputs, labels = Variable(inputs), Variable(labels)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    outputs = model(inputs)
                    _, preds = torch.max(outputs.data, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    # statistics
                    running_loss += loss.data[0]
                    running_corrects += torch.sum(preds == labels.data)

                epoch_loss = running_loss / dset_sizes[phase]
                epoch_acc = running_corrects / dset_sizes[phase]

                print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                    phase, epoch_loss, epoch_acc))

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model = copy.deepcopy(model)

            #print("last layer weights:")
            #print(model.fc.weight)
            #print('{:.7f}: sum of abs of difference in weights'.format(
            #    (final_layer_weights_last_iteration - model.fc.weight).abs().sum().data[0]))
            #final_layer_weights_last_iteration = model.fc.weight.clone()
            #print()

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print('Best val Acc: {:4f}'.format(best_acc))
        return (best_model, best_acc)

    def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7):
        """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
        lr = init_lr * (0.1**(epoch // lr_decay_epoch))

        if epoch % lr_decay_epoch == 0:
            print('LR is set to {}'.format(lr))

        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        return optimizer

    model = vgg16_sp(20, pretrained=True)
    checkpoint = torch.load(model_checkpoint_location)
    model.load_state_dict(checkpoint['state_dict'])
    #for param in model_ft.parameters():
    #    param.requires_grad = False
    num_maps = 1024
    model.classifier = nn.Sequential(nn.Dropout(0.5),
                                     nn.Linear(num_maps, len(dset_classes)))

    if use_gpu:
        model_ft = model.cuda()

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

    model_ft, best_acc = train_model(model_ft,
                                     criterion,
                                     optimizer_ft,
                                     exp_lr_scheduler,
                                     num_epochs=25)

    torch.save(model_ft.state_dict(), model_output_file)