def main(): if args.gpu is not None: print(('Using GPU %d' % args.gpu)) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('CPU mode') os.makedirs("checkpoints", exist_ok=True) print('Process number: %d' % (os.getpid())) ## DataLoader initialize ILSVRC2012_train_processed print('Start dataloader') trainpath = args.data_path if os.path.exists(trainpath + '_255x255'): trainpath += '_255x255' train_data = JigsawDataset(args.data_path, './jigsaw_train.txt', classes=args.classes) train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=args.batch, shuffle=True, num_workers=args.cores) valpath = args.data_path #args.data_path+'/ILSVRC2012_img_val' if os.path.exists(valpath + '_255x255'): valpath += '_255x255' val_data = JigsawDataset(args.data_path, './jigsaw_val.txt', classes=args.classes) val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=args.batch, shuffle=True, num_workers=args.cores) N = train_data.N print('Finish data loader') iter_per_epoch = train_data.N / args.batch print('Images: train %d, validation %d' % (train_data.N, val_data.N)) # Network initialize net = Network(args.classes) if args.gpu is not None: net.cuda() print('Initialize the model') ############## Load from checkpoint if exists, otherwise from model ############### if os.path.exists(args.checkpoint_path): net.load_state_dict(torch.load(args.checkpoint_path)) print("Load a checkpoint ", args.checkpoint_path) else: if args.model is not None: net.load(args.model) if args.model is not None: net.load(args.model) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) #logger = Logger(args.checkpoint+'/train') #logger_test = Logger(args.checkpoint+'/test') ############## TESTING ############### #if args.evaluate: # test(net,criterion,None,val_loader,0) # return ############## TRAINING ############### print(('Start training: lr %f, batch size %d, classes %d' % (args.lr, args.batch, args.classes))) print(('Checkpoint: ' + args.checkpoint)) # Train the Model #batch_time, net_time = [], [] steps = args.iter_start print( "======================================================================" ) for epoch in range(args.epochs): print('Epoch {} begins: '.format(epoch)) t = time() if epoch % 5 == 0 and epoch > 0: test(net, criterion, val_loader, steps) lr = adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=20, decay=0.1) for i, (images, labels, original) in enumerate(train_loader): #batch_time.append(time()-end) #if len(batch_time)>100: # del batch_time[0] images = Variable(images) labels = Variable(labels) if args.gpu is not None: images = images.cuda() labels = labels.cuda() # Forward + Backward + Optimize optimizer.zero_grad() outputs = net(images) #net_time.append(time()-t) #if len(net_time)>100: # del net_time[0] prec1, prec5 = compute_accuracy(outputs.cpu().data, labels.cpu().data, topk=(1, 5)) acc = prec1.item() loss = criterion(outputs, labels) loss.backward() optimizer.step() loss = float(loss.cpu().data.numpy()) """ if steps%2==0: print(('[%2d/%2d] %5d) [batch load % 2.3fsec, net %1.2fsec], LR %.5f, Loss: % 1.3f, Accuracy % 2.2f%%' %( epoch+1, args.epochs, steps, np.mean(batch_time), np.mean(net_time), lr, loss,acc))) if steps%20==0: logger.scalar_summary('accuracy', acc, steps) logger.scalar_summary('loss', loss, steps) original = [im[0] for im in original] imgs = np.zeros([9,75,75,3]) for ti, img in enumerate(original): img = img.numpy() imgs[ti] = np.stack([(im-im.min())/(im.max()-im.min()) for im in img],axis=2) logger.image_summary('input', imgs, steps) """ steps += 1 end = time() print('Epoch {}/{} LR = {}, Loss = {}, Accuracy = {}, Time taken = {}'. format(epoch + 1, args.epochs, lr, loss, acc, end - t)) if epoch % args.checkpoint_interval == 0: filename = args.checkpoint + 'jigsaw7_epoch_{}.pth'.format(epoch) net.save(filename)
def main(): if args.gpu is not None: print(('Using GPU %d' % args.gpu)) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('CPU mode') print('Process number: %d' % (os.getpid())) ## DataLoader initialize ILSVRC2012_train_processed trainpath = args.data + '/ILSVRC2012_img_train' if os.path.exists(trainpath + '_255x255'): trainpath += '_255x255' train_data = DataLoader(trainpath, args.data + '/ilsvrc12_train.txt', classes=args.classes) train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=args.batch, shuffle=True, num_workers=args.cores) valpath = args.data + '/ILSVRC2012_img_val' if os.path.exists(valpath + '_255x255'): valpath += '_255x255' val_data = DataLoader(valpath, args.data + '/ilsvrc12_val.txt', classes=args.classes) val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=args.batch, shuffle=True, num_workers=args.cores) N = train_data.N iter_per_epoch = train_data.N / args.batch print('Images: train %d, validation %d' % (train_data.N, val_data.N)) # Network initialize net = Network(args.classes) if args.gpu is not None: net.cuda() ############## Load from checkpoint if exists, otherwise from model ############### if os.path.exists(args.checkpoint): files = [f for f in os.listdir(args.checkpoint) if 'pth' in f] if len(files) > 0: files.sort() #print files ckp = files[-1] net.load_state_dict(torch.load(args.checkpoint + '/' + ckp)) args.iter_start = int(ckp.split(".")[-3].split("_")[-1]) print('Starting from: ', ckp) else: if args.model is not None: net.load(args.model) else: if args.model is not None: net.load(args.model) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) logger = Logger(args.checkpoint + '/train') logger_test = Logger(args.checkpoint + '/test') ############## TESTING ############### if args.evaluate: test(net, criterion, None, val_loader, 0) return ############## TRAINING ############### print(('Start training: lr %f, batch size %d, classes %d' % (args.lr, args.batch, args.classes))) print(('Checkpoint: ' + args.checkpoint)) # Train the Model batch_time, net_time = [], [] steps = args.iter_start for epoch in range(int(args.iter_start / iter_per_epoch), args.epochs): if epoch % 10 == 0 and epoch > 0: test(net, criterion, logger_test, val_loader, steps) lr = adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=20, decay=0.1) end = time() for i, (images, labels, original) in enumerate(train_loader): batch_time.append(time() - end) if len(batch_time) > 100: del batch_time[0] images = Variable(images) labels = Variable(labels) if args.gpu is not None: images = images.cuda() labels = labels.cuda() # Forward + Backward + Optimize optimizer.zero_grad() t = time() outputs = net(images) net_time.append(time() - t) if len(net_time) > 100: del net_time[0] prec1, prec5 = compute_accuracy(outputs.cpu().data, labels.cpu().data, topk=(1, 5)) acc = prec1[0] loss = criterion(outputs, labels) loss.backward() optimizer.step() loss = float(loss.cpu().data.numpy()) if steps % 20 == 0: print(( '[%2d/%2d] %5d) [batch load % 2.3fsec, net %1.2fsec], LR %.5f, Loss: % 1.3f, Accuracy % 2.2f%%' % (epoch + 1, args.epochs, steps, np.mean(batch_time), np.mean(net_time), lr, loss, acc))) if steps % 20 == 0: logger.scalar_summary('accuracy', acc, steps) logger.scalar_summary('loss', loss, steps) original = [im[0] for im in original] imgs = np.zeros([9, 75, 75, 3]) for ti, img in enumerate(original): img = img.numpy() imgs[ti] = np.stack([(im - im.min()) / (im.max() - im.min()) for im in img], axis=2) logger.image_summary('input', imgs, steps) steps += 1 if steps % 1000 == 0: filename = '%s/jps_%03i_%06d.pth.tar' % (args.checkpoint, epoch, steps) net.save(filename) print('Saved: ' + args.checkpoint) end = time() if os.path.exists(args.checkpoint + '/stop.txt'): # break without using CTRL+C break
def main(): if args.gpu is not None: print('Using GPU %d' % args.gpu) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('CPU mode') ## DataLoader initialize ILSVRC2012_train_processed #train_loader = DataLoader(args.data+'/ILSVRC2012_img_train', #args.data+'/ilsvrc12_train.txt', batchsize=args.batch, #classes=args.classes, n_cores = 10) #N = train_loader.N train_data = DataLoader(args.data + '/ILSVRC2012_img_train', args.data + '/ilsvrc12_train.txt', classes=args.classes) train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=args.batch, shuffle=True, num_workers=16) N = train_data.N iter_per_epoch = N / args.batch print 'Images: %d' % (N) # Network initialize net = Network(args.classes) if args.gpu is not None: net.cuda() if os.path.exists(args.checkpoint): files = [f for f in os.listdir(args.checkpoint) if 'pth' in f] if len(files) > 0: files.sort() #print files ckp = files[-1] net.load_state_dict(torch.load(args.checkpoint + ckp)) args.iter_start = int(ckp.split(".")[-3].split("_")[-1]) print 'Starting from: ', ckp else: if args.model is not None: net.load(args.model) else: if args.model is not None: net.load(args.model) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) logger = Logger(args.checkpoint + '/train') logger_test = Logger(args.checkpoint + '/test') ############## TRAINING ############### print('Start training: lr %f, batch size %d, classes %d' % (args.lr, args.batch, args.classes)) print('Checkpoint: ' + args.checkpoint) # Train the Model batch_time, net_time = [], [] steps = args.iter_start for epoch in range(int(args.iter_start / iter_per_epoch), args.epochs): lr = adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=20, decay=0.1) #for i, (images, labels, _) in enumerate(train_loader): it = iter(train_loader) for i in range(int((float(N) / args.batch)) - 1): t = time() images, labels, _ = it.next() batch_time.append(time() - t) if len(batch_time) > 100: del batch_time[0] images = Variable(images) labels = Variable(labels) if args.gpu is not None: images = images.cuda() labels = labels.cuda() # Forward + Backward + Optimize optimizer.zero_grad() t = time() outputs = net(images) net_time.append(time() - t) if len(net_time) > 100: del net_time[0] prec1, prec5 = compute_accuracy(outputs.cpu().data, labels.cpu().data, topk=(1, 5)) acc = prec1[0] loss = criterion(outputs, labels) loss.backward() optimizer.step() loss = float(loss.cpu().data.numpy()) if steps % 1 == 0: print( '[%2d/%2d] %5d) [batch load % 2.2fsec, net %1.2fsec], LR %.5f, Loss: % 1.3f, Accuracy % 2.1f%%' % (epoch + 1, args.epochs, steps, np.mean(batch_time), np.mean(net_time), lr, loss, acc)) if steps % 20 == 0: logger.scalar_summary('accuracy', acc, steps) logger.scalar_summary('loss', loss, steps) #data = original.numpy() #logger.image_summary('input', data[:10], steps) steps += 1 if steps % 1000 == 0: filename = '%s/jps_%03i_%06d.pth.tar' % (args.checkpoint, epoch, steps) net.save(filename) print 'Saved: ' + args.checkpoint if os.path.exists(args.checkpoint + '/stop.txt'): # break without using CTRL+C break
def main(): if args.gpu is not None: print('Using GPU %d' % args.gpu) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('CPU mode') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transform = transforms.Compose([ transforms.RandomSizedCrop(227), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) val_transform = transforms.Compose([ #transforms.Scale(256), #transforms.CenterCrop(227), transforms.RandomSizedCrop(227), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) # DataLoader initialize train_data = DataLoader(args.pascal_path, 'trainval', transform=train_transform) train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=args.batch, shuffle=True, num_workers=CORES) val_data = DataLoader(args.pascal_path, 'test', transform=val_transform, random_crops=args.crops) val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=args.batch, shuffle=False, num_workers=CORES) N = len(train_data.names) iter_per_epoch = N / args.batch # Network initialize #net = Network(groups = 2) net = Network(num_classes=21) if args.gpu is not None: net.cuda() if args.model is not None: net.load(args.model, args.fc) if args.freeze is not None: # Freeze layers up to conv4 for i, (name, param) in enumerate(net.named_parameters()): if 'conv' in name or 'features' in name: param.requires_grad = False criterion = nn.MultiLabelSoftMarginLoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr, momentum=0.9, weight_decay=0.0001) if not os.path.exists(args.checkpoint): os.makedirs(args.checkpoint + '/train') os.makedirs(args.checkpoint + '/test') # logger_test = None logger_train = Logger(args.checkpoint + '/train') logger_test = Logger(args.checkpoint + '/test') ############## TRAINING ############### print('Start training: lr %f, batch size %d' % (args.lr, args.batch)) print('Checkpoint: ' + args.checkpoint) # Train the Model steps = args.iter_start for epoch in range(iter_per_epoch * args.iter_start, args.epochs): adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=80, decay=0.1) mAP = [] for i, (images, labels) in enumerate(train_loader): images = Variable(images) labels = Variable(labels) if args.gpu is not None: images = images.cuda() labels = labels.cuda() # Forward + Backward + Optimize optimizer.zero_grad() outputs = net(images) mAP.append(compute_mAP(labels.data, outputs.data)) loss = criterion(outputs, labels) loss.backward() optimizer.step() loss = loss.cpu().data.numpy() if steps % 100 == 0: print '[%d/%d] %d), Loss: %.3f, mAP %.2f%%' % ( epoch + 1, args.epochs, steps, loss, 100 * np.mean(mAP[-20:])) if steps % 20 == 0: logger_train.scalar_summary('mAP', np.mean(mAP[-20:]), steps) logger_train.scalar_summary('loss', loss, steps) data = images.cpu().data.numpy().transpose([0, 2, 3, 1]) logger_train.image_summary('input', data[:10], steps) steps += 1 if epoch % 5 == 0: net.save(args.checkpoint, epoch + 1) print 'Saved: ' + args.checkpoint if epoch % 5 == 0: test(net, criterion, logger_test, val_loader, steps) if os.path.exists(args.checkpoint + '/stop.txt'): # break without using CTRL+C break