def main_voc2007(): global args, best_prec1, use_gpu args = parser.parse_args() use_gpu = torch.cuda.is_available() # define dataset train_dataset = Voc2012Classification(args.data, 'train') val_dataset = Voc2012Classification(args.data, 'test') num_classes = 20 # load model model = vgg16_sp(num_classes, pretrained=True) print(model) criterion = nn.MultiLabelSoftMarginLoss() state = { 'batch_size': args.batch_size, 'max_epochs': args.epochs, 'image_size': args.image_size, 'evaluate': args.evaluate, 'resume': args.resume, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay } state['difficult_examples'] = True state['save_model_path'] = 'logs/voc2007/' engine = MultiLabelMAPEngine(state) engine.multi_learning(model, criterion, train_dataset, val_dataset)
def main_voc2007(): global args, best_prec1, use_gpu args = parser.parse_args() use_gpu = torch.cuda.is_available() # define dataset train_dataset = Voc2007Classification(args.data, 'trainval') val_dataset = Voc2007Classification(args.data, 'test') num_classes = 20 # load model model = vgg16_sp(num_classes, pretrained=True) print(model) criterion = nn.MultiLabelSoftMarginLoss() optimizer = torch.optim.SGD(model.get_config_optim(args.lr), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) state = {'batch_size': args.batch_size, 'image_size': args.image_size, 'max_epochs': args.epochs, 'evaluate': args.evaluate, 'resume': args.resume} state['difficult_examples'] = True state['save_model_path'] = 'logs/voc2007/' if not os.path.exists(state['save_model_path']): os.makedirs(state['save_model_path']) engine = MultiLabelMAPEngine(state) engine.learning(model, criterion, train_dataset, val_dataset, optimizer)
def load_model_voc(model_path, multiscale=False, scale=224): model = models.vgg16_sp(20, False) state_dict = torch.load(model_path) model_dict = {} for sdict in state_dict: model.load_state_dict(sdict['state_dict']) model_dict[sdict['image_size']] = deepcopy(model).cuda() return model_dict if multiscale else {scale: model_dict[scale]}
def runObjDetection(data_dir, model_input_location, output_dir_class0, output_dir_class1, output_dir_skip, categoryThreshold): # Data augmentation and normalization for training # do transforms that are normally just for validation # for all data imageScales = [1000, 750, 500, 250] # how big will the whole image be in pixels imageMaxScale = max(imageScales) class regionCoordinate: def __init__(self, x, y, w, h, scale): self.x = x self.y = y self.w = w self.h = h self.scale = scale def __str__(self): return "x: {0}, y: {1}, w: {2}, h: {3}, scale: {4}".format( self.x, self.y, self.w, self.h, self.scale) network_input_width = 250 # widths the network expeccts regionCoordinates = [ regionCoordinate(x, y, network_input_width, network_input_width, s) for s in imageScales for x in range(0, s, 250) for y in range(0, s, 250) ] data_transforms = transforms.Compose([ transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dset = datasets.ImageFolder(data_dir) use_gpu = torch.cuda.is_available() model = vgg16_sp(3, pretrained=True) model.load_state_dict(torch.load(model_input_location)) #for param in model.parameters(): # param.requires_grad = False # if model_input_location is empty, use default weights model = model.cuda() # take in the bounds from selective search (left, top, width, height) # and crop the image using them def cropImageUsingBounds(image, coordinates): # cropCoordinates is in format left, upper, right, lower # which pillow's crop wants cropCoordinatesPillow = (coordinates.x, coordinates.y, coordinates.x + coordinates.w, coordinates.y + coordinates.h) return image.crop(cropCoordinatesPillow) outputs = [] i = 0 numPoints = str(len(dset)) numSkipped = 0 numClass0Right = 0 numClass0Wrong = 0 numClass1Right = 0 numClass1Wrong = 0 def printCurrentStats(): print("percent right: " + str((numClass0Right + numClass1Right) / i)) print("percent wrong: " + str((numClass0Wrong + numClass1Wrong) / i)) print("percent skipped: " + str(numSkipped / i)) print("class 0 right: " + str(numClass0Right)) print("class 0 wrong: " + str(numClass0Wrong)) print("class 1 right: " + str(numClass1Right)) print("class 1 wrong: " + str(numClass1Wrong)) print("skipped: " + str(numSkipped)) os.system("mkdir -p " + output_dir_class0 + "/right/") os.system("mkdir -p " + output_dir_class0 + "/wrong/") os.system("mkdir -p " + output_dir_class0 + "/heatmap/") os.system("mkdir -p " + output_dir_class1 + "/right/") os.system("mkdir -p " + output_dir_class1 + "/wrong/") os.system("mkdir -p " + output_dir_class1 + "/heatmap/") os.system("mkdir -p " + output_dir_skip) for dataPoint in dset: pil_image_unresized, labelIndex = dataPoint pil_image = pil_image_unresized.resize((imageMaxScale, imageMaxScale)) print("Working on element " + str(i) + " of " + numPoints, flush=True) fileName = os.path.basename(dset.imgs[i][0]) fileFullPath = dset.imgs[i][0] fileNameWithoutExt = os.path.splitext(fileName)[0] i += 1 imageRegions = [] imageRegionsAsTensors = [] for regionCoordinate in regionCoordinates: croppedImage = cropImageUsingBounds(pil_image, regionCoordinate) imageRegions.append(croppedImage) imageAsTensorForEval = data_transforms(croppedImage) imageRegionsAsTensors.append(imageAsTensorForEval) imageRegionsAsOneTensor = torch.stack(imageRegionsAsTensors) # wrap them in Variable if use_gpu: inputs = Variable(imageRegionsAsOneTensor.cuda()) else: inputs = Variable(imageRegionsAsOneTensor) # based on __getitem__ implementation of datasets.ImageLoader, imgs index matches that of items classProbabilityTensor = F.softmax(model(inputs)).data # take the region with the max probability of being the desired # class # max(0) gives the indices and values of the max in the first # dimension fo tensor, which is max probability of being in # each category mostLikely = classProbabilityTensor.max(0) # in mostLikely, first [0] gives the probabilities of the boxes that are most likely to be in each class. skip if both max probabilities < 0.9 # second [0] gives the probability of the element with the max probability # of being the first class (0 indexing) print("max probabilites are " + str(mostLikely[0])) if mostLikely[0][0] < categoryThreshold and mostLikely[0][ 1] < categoryThreshold: print("dropping image " + fileName + " as probabilites were all less than " + str(categoryThreshold)) #makeAndSaveToFileCamClassificationHeatmap(model_input_location, fileFullPath, # output_dir_skip + "/" + fileNameWithoutExt + "_0.jpg", label_map, 0) #makeAndSaveToFileCamClassificationHeatmap(model_input_location, fileFullPath, # output_dir_skip + "/" + fileNameWithoutExt + "_1.jpg", label_map, 1) numSkipped += 1 # write to the folder for class 0 or 1 depending on which is most likely # if likely to be in both classes, write to both if mostLikely[0][0] > categoryThreshold: print("think image " + fileName + " is class 0 as most likely object was: " + str(mostLikely[0])) indexOfMostLikely = classProbabilityTensor.max(0)[1][0] # [1] gives the indices instead of the probabilities pil_image.save(output_dir_class0 + "/" + fileName) #make the cam heatmap for this class #makeAndSaveToFileCamClassificationHeatmap(model_input_location, output_dir_class0 + "/" + fileName, # output_dir_class0 + "/heatmap/" + fileName, label_map, 0) if labelIndex == 0: numClass0Right += 1 imageRegions[indexOfMostLikely].save(output_dir_class0 + "/right/" + fileName) else: numClass0Wrong += 1 imageRegions[indexOfMostLikely].save(output_dir_class0 + "/wrong/" + fileName) if mostLikely[0][1] > categoryThreshold: print("think image " + fileName + " is class 1 as most likely object was: " + str(mostLikely[0])) indexOfMostLikely = classProbabilityTensor.max(0)[1][1] # [1] gives the indices instead of the probabilities pil_image.save(output_dir_class1 + "/" + fileName) #makeAndSaveToFileCamClassificationHeatmap(model_input_location, output_dir_class1 + "/" + fileName, # output_dir_class1 + "/heatmap/" + fileName, label_map, 1) if labelIndex == 1: numClass1Right += 1 imageRegions[indexOfMostLikely].save(output_dir_class1 + "/right/" + fileName) else: numClass1Wrong += 1 imageRegions[indexOfMostLikely].save(output_dir_class1 + "/wrong/" + fileName) printCurrentStats()
def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7): """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs.""" lr = init_lr * (0.1**(epoch // lr_decay_epoch)) if epoch % lr_decay_epoch == 0: print('LR is set to {}'.format(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr return optimizer model = vgg16_sp(20, pretrained=True) checkpoint = torch.load(model_checkpoint_location) model.load_state_dict(checkpoint['state_dict']) #for param in model_ft.parameters(): # param.requires_grad = False num_maps = 1024 model.classifier = nn.Sequential(nn.Dropout(0.5), nn.Linear(num_maps, len(dset_classes))) if use_gpu: model_ft = model.cuda() criterion = nn.CrossEntropyLoss() # Observe that all parameters are being optimized optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
def transferLearn(data_dir, model_checkpoint_location, label_map_file, model_output_file): # Data augmentation and normalization for training # Just normalization for validation data_transforms = { 'train': transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), torchsample.transforms.RandomAffine(90, (0.5, 0.5), 90), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } dsets = { x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val'] } #https://discuss.pytorch.org/t/how-to-enable-the-dataloader-to-sample-from-each-class-with-equal-probability/911 - if i need stratified sampling #https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3 def make_weights_for_balanced_classes(images, nclasses): count = [0] * nclasses for item in images: count[item[1]] += 1 weight_per_class = [0.] * nclasses N = float(sum(count)) for i in range(nclasses): weight_per_class[i] = N / float(count[i]) weight = [0] * len(images) for idx, val in enumerate(images): weight[idx] = weight_per_class[val[1]] return weight weights = make_weights_for_balanced_classes(dsets['train'].imgs, len(dsets['train'].classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(weights)) dset_loaders = {} dset_loaders['train'] = torch.utils.data.DataLoader(dsets['train'], batch_size=15, shuffle=False, sampler=sampler, num_workers=5, pin_memory=True) dset_loaders['val'] = torch.utils.data.DataLoader(dsets['val'], batch_size=15, shuffle=True, num_workers=5, pin_memory=True) #dset_loaders2 = {x: torch.utils.data.DataLoader(dsets[x], batch_size=15, # shuffle=False, num_workers=4) # for x in ['train', 'val']} dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']} dset_classes = dsets['train'].classes with open(label_map_file, 'a') as f: f.write(str([(i, c) for i, c in enumerate(dset_classes)])) print("Classes to index mapping is" + str([(i, c) for i, c in enumerate(dset_classes)])) use_gpu = torch.cuda.is_available() # Get a batch of training data inputs, classes = next(iter(dset_loaders['train'])) # Make a grid from batch out = torchvision.utils.make_grid(inputs) def classIndexToProbability(classIdx, class_to_idx_map): idx_to_class = {v: k for k, v in class_to_idx_map.items()} return [float(x) for x in idx_to_class[classIdx].split(",")] def train_model(model, criterion, optimizer, lr_scheduler, num_epochs): since = time.time() best_model = model best_acc = 0.0 #final_layer_weights_last_iteration = model.fc.weight.clone() for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': optimizer = lr_scheduler(optimizer, epoch) model.train(True) # Set model to training mode else: model.train(False) # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for data in dset_loaders[phase]: # get the inputs inputs, labels = data # wrap them in Variable if use_gpu: inputs = Variable(inputs.cuda()) labels = Variable(labels.cuda()) else: inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward outputs = model(inputs) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.data[0] running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / dset_sizes[phase] epoch_acc = running_corrects / dset_sizes[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format( phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model = copy.deepcopy(model) #print("last layer weights:") #print(model.fc.weight) #print('{:.7f}: sum of abs of difference in weights'.format( # (final_layer_weights_last_iteration - model.fc.weight).abs().sum().data[0])) #final_layer_weights_last_iteration = model.fc.weight.clone() #print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) return (best_model, best_acc) def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7): """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs.""" lr = init_lr * (0.1**(epoch // lr_decay_epoch)) if epoch % lr_decay_epoch == 0: print('LR is set to {}'.format(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr return optimizer model = vgg16_sp(20, pretrained=True) checkpoint = torch.load(model_checkpoint_location) model.load_state_dict(checkpoint['state_dict']) #for param in model_ft.parameters(): # param.requires_grad = False num_maps = 1024 model.classifier = nn.Sequential(nn.Dropout(0.5), nn.Linear(num_maps, len(dset_classes))) if use_gpu: model_ft = model.cuda() criterion = nn.CrossEntropyLoss() # Observe that all parameters are being optimized optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) model_ft, best_acc = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25) torch.save(model_ft.state_dict(), model_output_file)