def __init__(self, parent_model, save_model_dir, gpu_id, time_budget, save_result_dir=None): self.save_model_dir = save_model_dir self.parent_model = parent_model self.save_res_dir = save_result_dir self.net = vo.OSVOS(pretrained=0) if gpu_id >= 0: torch.cuda.set_device(device=gpu_id) self.net.cuda() self.gpu_id = gpu_id self.time_budget = time_budget self.meanval = (104.00699, 116.66877, 122.67892) self.train_batch = 1 self.test_batch = 1 self.prev_models = {} self.parent_model_state = torch.load( os.path.join(Path.models_dir(), self.parent_model), map_location=lambda storage, loc: storage)
parentEpoch = 240 # Parameters in p are used for the name of the model p = { 'trainBatch': 1, # Number of Images in each mini-batch } seed = 0 parentModelName = 'parent' # Select which GPU, -1 if CPU gpu_id = 0 device = torch.device("cuda:" + str(gpu_id) if torch.cuda.is_available() else "cpu") # Network definition net = vo.OSVOS(pretrained=0) net.load_state_dict( torch.load(os.path.join( save_dir, parentModelName + '_epoch-' + str(parentEpoch - 1) + '.pth'), map_location=lambda storage, loc: storage)) # Logging into Tensorboard log_dir = os.path.join( save_dir, 'runs', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname() + '-' + seq_name) writer = SummaryWriter(log_dir=log_dir) net.to(device) # PyTorch 0.4.0 style # Visualize the network
testBatch = 1 # Testing Batch nTestInterval = 5 # Run on test set every nTestInterval epochs db_root_dir = Path.db_root_dir() vis_net = 0 # Visualize the network? snapshot = 40 # Store a model every snapshot epochs nAveGrad = 10 load_caffe_vgg = True save_dir = Path.save_root_dir() if not os.path.exists(save_dir): os.makedirs(os.path.join(save_dir)) # Network definition modelName = 'parent' if resume_epoch == 0: if load_caffe_vgg: net = vo.OSVOS(pretrained=2) else: net = vo.OSVOS(pretrained=1) else: net = vo.OSVOS(pretrained=0) print("Updating weights from: {}".format( os.path.join(save_dir, modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'))) net.load_state_dict( torch.load(os.path.join(save_dir, modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'), map_location=lambda storage, loc: storage)) # Logging into Tensorboard log_dir = os.path.join(save_dir, 'runs', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) writer = SummaryWriter(log_dir=log_dir, comment='-parent')
import numpy as np import cv2 import torch import networks.vgg_osvos as vo files = [ "data/DAVIS/JPEGImages/480p/frames/00000.jpg", "data/DAVIS/JPEGImages/480p/frames/00001.jpg" ] fname = '00000+00001' device = torch.device("cpu") net = vo.OSVOS().eval() net.load_state_dict( torch.load('models/parent_epoch-239.pth', map_location=lambda storage, loc: storage)) net.to(device) with torch.no_grad(): imgs = [cv2.imread(file) for file in files] imgs = np.array(imgs, dtype=np.float32) imgs = np.array([ np.subtract( img, np.array((104.00699, 116.66877, 122.67892), dtype=np.float32)) for img in imgs ]) imgs = imgs.transpose((0, 3, 1, 2)) imgs = torch.tensor(imgs) inputs = imgs.to(device)
print(" - Online training snapshots every:", epochs_snap) # Parameters in p are used for the name of the model p = { 'trainBatch': args.batch_size, # Number of Images in each mini-batch } seed = 0 parentModelName = 'parent' # Select which GPU, -1 if CPU gpu_id = 0 device = torch.device("cuda:" + str(gpu_id) if torch.cuda.is_available() else "cpu") # Network definition net = vo.OSVOS(pretrained=0, models_dir=models_dir) net.load_state_dict( torch.load(os.path.join( models_dir, parentModelName + '_epoch-' + str(parentEpoch - 1) + '.pth'), map_location=lambda storage, loc: storage)) # Logging into Tensorboard log_dir = os.path.join( models_dir, 'runs', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname() + '-' + seq_name) writer = SummaryWriter(log_dir=log_dir) net.to(device) # PyTorch 0.4.0 style
def train(epochs_wo_avegrad): # Setting of parameters if 'SEQ_NAME' not in os.environ.keys(): seq_name = 'blackswan' else: seq_name = str(os.environ['SEQ_NAME']) db_root_dir = Path.db_root_dir() save_dir = Path.save_root_dir() if not os.path.exists(save_dir): os.makedirs(os.path.join(save_dir)) vis_net = 0 # Visualize the network? vis_res = 0 # Visualize the results? nAveGrad = 5 # Average the gradient every nAveGrad iterations nEpochs = epochs_wo_avegrad * nAveGrad # Number of epochs for training #CHANGED from 2000 snapshot = nEpochs # Store a model every snapshot epochs parentEpoch = 240 # Parameters in p are used for the name of the model p = { 'trainBatch': 1, # Number of Images in each mini-batch } seed = 0 parentModelName = 'parent' # Select which GPU, -1 if CPU gpu_id = 0 device = torch.device("cuda:" + str(gpu_id) if torch.cuda.is_available() else "cpu") # Network definition net = vo.OSVOS(pretrained=0) net.load_state_dict( torch.load(os.path.join( save_dir, parentModelName + '_epoch-' + str(parentEpoch - 1) + '.pth'), map_location=lambda storage, loc: storage)) # Logging into Tensorboard log_dir = os.path.join( save_dir, 'runs', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname() + '-' + seq_name) writer = SummaryWriter(logdir=log_dir) net.to(device) # PyTorch 0.4.0 style # Visualize the network if vis_net: x = torch.randn(1, 3, 480, 854) x.requires_grad_() x = x.to(device) y = net.forward(x) g = viz.make_dot(y, net.state_dict()) g.view() # Use the following optimizer lr = 1e-8 wd = 0.0002 optimizer = optim.SGD([ { 'params': [ pr[1] for pr in net.stages.named_parameters() if 'weight' in pr[0] ], 'weight_decay': wd }, { 'params': [pr[1] for pr in net.stages.named_parameters() if 'bias' in pr[0]], 'lr': lr * 2 }, { 'params': [ pr[1] for pr in net.side_prep.named_parameters() if 'weight' in pr[0] ], 'weight_decay': wd }, { 'params': [ pr[1] for pr in net.side_prep.named_parameters() if 'bias' in pr[0] ], 'lr': lr * 2 }, { 'params': [ pr[1] for pr in net.upscale.named_parameters() if 'weight' in pr[0] ], 'lr': 0 }, { 'params': [ pr[1] for pr in net.upscale_.named_parameters() if 'weight' in pr[0] ], 'lr': 0 }, { 'params': net.fuse.weight, 'lr': lr / 100, 'weight_decay': wd }, { 'params': net.fuse.bias, 'lr': 2 * lr / 100 }, ], lr=lr, momentum=0.9) # Preparation of the data loaders # Define augmentation transformations as a composition composed_transforms = transforms.Compose([ tr.RandomHorizontalFlip(), tr.ScaleNRotate(rots=(-30, 30), scales=(.75, 1.25)), tr.ToTensor() ]) # Training dataset and its iterator db_train = db.DAVIS2016(train=True, db_root_dir=db_root_dir, transform=composed_transforms, seq_name=seq_name) trainloader = DataLoader(db_train, batch_size=p['trainBatch'], shuffle=True, num_workers=1) # Testing dataset and its iterator db_test = db.DAVIS2016(train=False, db_root_dir=db_root_dir, transform=tr.ToTensor(), seq_name=seq_name) testloader = DataLoader(db_test, batch_size=1, shuffle=False, num_workers=1) num_img_tr = len(trainloader) num_img_ts = len(testloader) loss_tr = [] aveGrad = 0 print("Start of Online Training, sequence: " + seq_name) start_time = timeit.default_timer() # Main Training and Testing Loop for epoch in range(0, nEpochs): # One training epoch running_loss_tr = 0 np.random.seed(seed + epoch) for ii, sample_batched in enumerate(trainloader): inputs, gts = sample_batched['image'], sample_batched['gt'] # Forward-Backward of the mini-batch inputs.requires_grad_() inputs, gts = inputs.to(device), gts.to(device) outputs = net.forward(inputs) # Compute the fuse loss loss = class_balanced_cross_entropy_loss(outputs[-1], gts, size_average=False) running_loss_tr += loss.item() # PyTorch 0.4.0 style # Print stuff if epoch % (nEpochs // 20) == (nEpochs // 20 - 1): running_loss_tr /= num_img_tr loss_tr.append(running_loss_tr) print('[Epoch: %d, numImages: %5d]' % (epoch + 1, ii + 1)) print('Loss: %f' % running_loss_tr) writer.add_scalar('data/total_loss_epoch', running_loss_tr, epoch) # Backward the averaged gradient loss /= nAveGrad loss.backward() aveGrad += 1 # Update the weights once in nAveGrad forward passes if aveGrad % nAveGrad == 0: writer.add_scalar('data/total_loss_iter', loss.item(), ii + num_img_tr * epoch) optimizer.step() optimizer.zero_grad() aveGrad = 0 # Save the model if (epoch % snapshot) == snapshot - 1 and epoch != 0: torch.save( net.state_dict(), os.path.join(save_dir, seq_name + '_epoch-' + str(epoch) + '.pth')) stop_time = timeit.default_timer() print('Online training time: ' + str(stop_time - start_time)) # Testing Phase if vis_res: import matplotlib.pyplot as plt plt.close("all") plt.ion() f, ax_arr = plt.subplots(1, 3) save_dir_res = os.path.join(save_dir, 'Results', seq_name) if not os.path.exists(save_dir_res): os.makedirs(save_dir_res) print('Testing Network') with torch.no_grad(): # PyTorch 0.4.0 style # Main Testing Loop for ii, sample_batched in enumerate(testloader): img, gt, fname = sample_batched['image'], sample_batched[ 'gt'], sample_batched['fname'] # Forward of the mini-batch inputs, gts = img.to(device), gt.to(device) outputs = net.forward(inputs) for jj in range(int(inputs.size()[0])): pred = np.transpose( outputs[-1].cpu().data.numpy()[jj, :, :, :], (1, 2, 0)) pred = 1 / (1 + np.exp(-pred)) pred = np.squeeze(pred) # Save the result, attention to the index jj sm.imsave( os.path.join(save_dir_res, os.path.basename(fname[jj]) + '.png'), pred) if vis_res: img_ = np.transpose(img.numpy()[jj, :, :, :], (1, 2, 0)) gt_ = np.transpose(gt.numpy()[jj, :, :, :], (1, 2, 0)) gt_ = np.squeeze(gt) # Plot the particular example ax_arr[0].cla() ax_arr[1].cla() ax_arr[2].cla() ax_arr[0].set_title('Input Image') ax_arr[1].set_title('Ground Truth') ax_arr[2].set_title('Detection') ax_arr[0].imshow(im_normalize(img_)) ax_arr[1].imshow(gt_) ax_arr[2].imshow(im_normalize(pred)) plt.pause(0.001) writer.close()