def run(self): # set the device device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('running on device ' + str(device)) # load the model checkpoint print('loading checkpoint: ' + self.input) checkpoint = torch.load(self.input) arch = checkpoint['arch'] # create the model architecture print('using model: ' + arch) model = models.__dict__[arch](pretrained=True) sleep(0.01) # reshape the model's output model = reshape_model(model, arch, checkpoint['num_classes']) # load the model weights model.load_state_dict(checkpoint['state_dict']) # add softmax layer if not self.no_softmax: print('adding nn.Softmax layer to model...') model = torch.nn.Sequential(model, torch.nn.Softmax(1)) model.to(device) model.eval() print(model) sleep(0.01) # create example image data resolution = checkpoint['resolution'] input = torch.ones((1, 3, resolution, resolution)).cuda() print('input size: {:d}x{:d}'.format(resolution, resolution)) # format output model path self.output = arch + '.onnx' if self.model_dir and self.output.find('/') == -1 and self.output.find('\\') == -1: self.output = os.path.join(self.model_dir, self.output) # export the model input_names = [ "input_0" ] output_names = [ "output_0" ] print('exporting model to ONNX...') torch.onnx.export(model, input, self.output, verbose=True, input_names=input_names, output_names=output_names) print('model exported to: {:s}'.format(self.output)) self.signalEndExport.emit()
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ #transforms.Resize(224), transforms.RandomResizedCrop(args.resolution), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) num_classes = len(train_dataset.classes) print('=> dataset classes: ' + str(num_classes) + ' ' + str(train_dataset.classes)) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(args.resolution), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # create or load the model if using pre-trained (the default) if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() # reshape the model for the number of classes in the dataset model = reshape_model(model, args.arch, num_classes) # transfer the model to the GPU that it should be run on if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # if in evaluation mode, only run validation if args.evaluate: validate(val_loader, model, criterion, num_classes, args) return # train for the specified number of epochs for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # decay the learning rate adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, num_classes, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, num_classes, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'resolution': args.resolution, 'num_classes': num_classes, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer' : optimizer.state_dict(), }, is_best, args)
# set the device device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('running on device ' + str(device)) # load the model checkpoint print('loading checkpoint: ' + opt.input) checkpoint = torch.load(opt.input) arch = checkpoint['arch'] # create the model architecture print('using model: ' + arch) model = models.__dict__[arch](pretrained=True) # reshape the model's output model = reshape_model(model, arch, checkpoint['output_dims']) # load the model weights model.load_state_dict(checkpoint['state_dict']) # add softmax layer if not opt.no_softmax: print('adding nn.Softmax layer to model...') model = torch.nn.Sequential(model, torch.nn.Softmax(1)) model.to(device) model.eval() print(model) # create example image data
# set the device device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('running on device ' + str(device)) # load the model checkpoint print('loading checkpoint: ' + opt.input) checkpoint = torch.load(opt.input) arch = checkpoint['arch'] # create the model architecture print('using model: ' + arch) model = models.__dict__[arch](pretrained=True) # reshape the model's output model = reshape_model(model, arch, checkpoint['num_classes']) # load the model weights model.load_state_dict(checkpoint['state_dict']) # add softmax layer if not opt.no_softmax: print('adding nn.Softmax layer to model...') model = torch.nn.Sequential(model, torch.nn.Softmax(1)) model.to(device) model.eval() print(model) # create example image data
def run(self): self.gpu = 0 print("Use GPU: {} for training".format(self.gpu)) # data loading code traindir = os.path.join(self.data_dir, 'train') valdir = os.path.join(self.data_dir, 'val') normalize = transforms.Normalize(mean=self.mean, std=self.std) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ #transforms.Resize(224), transforms.RandomResizedCrop( self.resolution, scale=(0.5, 1.0), ratio=( 1.0, 1.0)), # Pour ne pas avoir de déformation de l'image transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) num_classes = len(train_dataset.classes) print('=> dataset classes: ' + str(num_classes) + ' ' + str(train_dataset.classes)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.workers, pin_memory=True, sampler=None) val_dataset = datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(self.resolution), transforms.ToTensor(), normalize, ])) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.workers, pin_memory=True) val_loader_all = torch.utils.data.DataLoader( val_dataset, batch_size=len(val_dataset), shuffle=False, num_workers=self.workers, pin_memory=True) # create or load the model if using pre-trained (the default) print("=> using pre-trained model '{}'".format(self.arch)) model = models.__dict__[self.arch](pretrained=True) # reshape the model for the number of classes in the dataset model_cpu = reshape_model(model, self.arch, num_classes) # transfer the model to the GPU that it should be run on torch.cuda.set_device(self.gpu) model = model_cpu.cuda(self.gpu) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(self.gpu) optimizer = torch.optim.SGD(model.parameters(), self.lr, momentum=self.momentum, weight_decay=self.weight_decay) cudnn.benchmark = True # default `log_dir` is "runs" - we'll be more specific here writer = SummaryWriter('runs/hackathon_AI') # get some random training images dataiter = iter(train_loader) images, labels = dataiter.next() # create grid of images img_grid = torchvision.utils.make_grid(images) # get and show the unnormalized images img_grid = self.show_img(img_grid) # write to tensorboard writer.add_image('hackathon', img_grid) #writer.add_graph(model_cpu, images) # bug : RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _thnn_conv2d_forward # train for the specified number of epochs for epoch in range(self.start_epoch, self.epochs): print('Begin epoch #{}'.format(epoch)) sleep(0.001) # decay the learning rate self.adjust_learning_rate(optimizer, epoch) # train for one epoch self.train(train_loader, model, criterion, optimizer, epoch, num_classes) # evaluate on validation set acc1, loss, ret_images, ret_target = self.validate( val_loader, model, criterion, num_classes) # save on Tensorboard writer.add_scalar('validation loss', loss, epoch) writer.add_scalar('validation accuracy', acc1, epoch) # ...log a Matplotlib Figure showing the model's predictions on all validation images dataiter_val = iter(val_loader_all) images_val, labels_val = dataiter.next() writer.add_figure('predictions vs. actuals', self.plot_classes_preds(model, images_val, labels_val, train_dataset.classes), global_step=epoch) # remember best acc@1 and save checkpoint is_best = acc1 > self.best_acc1 self.best_acc1 = max(acc1, self.best_acc1) if not self.multiprocessing_distributed or ( self.multiprocessing_distributed and self.rank % 1 == 0): self.save_checkpoint( { 'epoch': epoch + 1, 'arch': self.arch, 'resolution': self.resolution, 'num_classes': num_classes, 'state_dict': model.state_dict(), 'best_acc1': self.best_acc1, 'optimizer': optimizer.state_dict(), }, is_best) writer.close() self.signalEndTraining.emit()