def main(): model = im2recipe(inst_emb=opts.inst_emb) model.visionMLP = torch.nn.DataParallel(model.visionMLP) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) if opts.semantic_reg: weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 # the background class is set to 0, i.e. ignore # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class class_crit = nn.CrossEntropyLoss(weight=weights_class).to(device) # we will use two different criteria criterion = [cosine_crit, class_crit] else: criterion = cosine_crit print("=> loading checkpoint '{}'".format(opts.model_path)) if device.type == 'cpu': checkpoint = torch.load(opts.model_path, encoding='latin1', map_location='cpu') else: checkpoint = torch.load(opts.model_path, encoding='latin1') opts.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.model_path, checkpoint['epoch'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # preparing test loader test_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Resize( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=data_path, sem_reg=opts.semantic_reg, partition='test', n_samples=opts.n_samples), batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=True) print('Test loader prepared.') # run test test(test_loader, model, criterion)
def main(): model = im2recipe() model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0]) if not opts.no_cuda: model.cuda() print("=> loading checkpoint '{}'".format(opts.model_path)) checkpoint = torch.load(opts.model_path) opts.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.model_path, checkpoint['epoch'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # preparing test loader test_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Scale( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opts.data_path, sem_reg=opts.semantic_reg, partition='temp'), batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=(not opts.no_cuda)) print('Test loader prepared.') # run test test(test_loader, model)
def main(): gpus = ','.join(map(str, opts.gpu)) os.environ["CUDA_VISIBLE_DEVICES"] = gpus model = im2ingr() model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=range(len(opts.gpu))) # model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0,1]) model.cuda() # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).cuda() # cosine_crit = nn.CosineEmbeddingLoss(0.1) if opts.semantic_reg: weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 # the background class is set to 0, i.e. ignore # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class class_crit = nn.CrossEntropyLoss(weight=weights_class).cuda() # class_crit = nn.CrossEntropyLoss(weight=weights_class) # we will use two different criteria criterion = [cosine_crit, class_crit] else: criterion = cosine_crit print("=> loading checkpoint '{}'".format(opts.model_path)) checkpoint = torch.load(opts.model_path) opts.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.model_path, checkpoint['epoch'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # preparing test loader test_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Scale( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opts.data_path, sem_reg=opts.semantic_reg, partition='test', ingrW2V=opts.ingrW2V), batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=True) print('Test loader prepared.') # run test test(test_loader, model, criterion)
def main(): global args, best_error model_v = GazeLSTM() model = torch.nn.DataParallel(model_v).cuda() model.cuda() cudnn.benchmark = True image_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(ImagerLoader( source_path, train_file, transforms.Compose([ transforms.RandomResizedCrop(size=224, scale=(0.8, 1)), transforms.ToTensor(), image_normalize, ])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(ImagerLoader( source_path, val_file, transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), image_normalize, ])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) criterion = PinBallLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), lr) if test == True: test_loader = torch.utils.data.DataLoader(ImagerLoader( source_path, test_file, transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), image_normalize, ])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) checkpoint = torch.load(checkpoint_test) model.load_state_dict(checkpoint['state_dict']) angular_error = validate(test_loader, model, criterion) print('Angular Error is', angular_error) for epoch in range(0, epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set angular_error = validate(val_loader, model, criterion) # remember best angular error in validation and save checkpoint is_best = angular_error < best_error best_error = min(angular_error, best_error) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_error, }, is_best)
label_split = np.array_split(data_arr[:, 1], num_batches) train_loader = [(img_split[i], label_split[i]) for i in range(num_batches)] val_loader = train_loader # you want to overfit anyway for testing. test_loader = val_loader else: from data_loader import ImagerLoader train_loader = torch.utils.data.DataLoader( ImagerLoader( f"{data_dir}/images/", transforms.Compose([ transforms.Scale( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 256), # we get only the center of that rescaled transforms.RandomCrop( 224), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]), data_path=f"{data_dir}/lmdbs/", partition='train', sem_reg=None), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(ImagerLoader( f"{data_dir}/images/",
def main(): model = im2recipe(inst_emb=opts.inst_emb) model.visionMLP = torch.nn.DataParallel(model.visionMLP) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) if opts.semantic_reg: weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 # the background class is set to 0, i.e. ignore # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class class_crit = nn.CrossEntropyLoss(weight=weights_class).to(device) # we will use two different criteria criterion = [cosine_crit, class_crit] else: criterion = cosine_crit # # creating different parameter groups vision_params = list(map(id, model.visionMLP.parameters())) base_params = filter(lambda p: id(p) not in vision_params, model.parameters()) # optimizer - with lr initialized accordingly optimizer = torch.optim.Adam([ {'params': base_params}, {'params': model.visionMLP.parameters(), 'lr': opts.lr*opts.freeVision } ], lr=opts.lr*opts.freeRecipe) if opts.resume: if os.path.isfile(opts.resume): print("=> loading checkpoint '{}'".format(opts.resume)) checkpoint = torch.load(opts.resume) opts.start_epoch = checkpoint['epoch'] best_val = checkpoint['best_val'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(opts.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opts.resume)) best_val = float('inf') else: best_val = float('inf') # models are save only when their loss obtains the best value in the validation valtrack = 0 print('There are %d parameter groups' % len(optimizer.param_groups)) print('Initial base params lr: %f' % optimizer.param_groups[0]['lr']) print('Initial vision params lr: %f' % optimizer.param_groups[1]['lr']) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) cudnn.benchmark = True # preparing the training laoder train_loader = torch.utils.data.DataLoader( ImagerLoader(opts.img_path, transforms.Compose([ transforms.Resize(256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(256), # we get only the center of that rescaled transforms.RandomCrop(224), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]),data_path=data_path,sem_reg=opts.semantic_reg,partition='train',n_samples=opts.n_samples), batch_size=opts.batch_size, shuffle=True, num_workers=opts.workers, pin_memory=True) print('Training loader prepared.') # preparing validation loader val_loader = torch.utils.data.DataLoader( ImagerLoader(opts.img_path, transforms.Compose([ transforms.Resize(256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]),data_path=data_path,sem_reg=opts.semantic_reg,partition='val',n_samples=opts.n_samples), batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=True) print('Validation loader prepared.') # run epochs for epoch in range(opts.start_epoch, opts.epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch+1) % opts.valfreq == 0 and epoch != 0: val_loss = validate(val_loader, model, criterion, epoch) # check patience if val_loss >= best_val: valtrack += 1 else: valtrack = 0 if valtrack >= opts.patience: # we switch modalities opts.freeVision = opts.freeRecipe; opts.freeRecipe = not(opts.freeVision) # change the learning rate accordingly adjust_learning_rate(optimizer, epoch, opts) valtrack = 0 # save the best model is_best = val_loss < best_val best_val = min(val_loss, best_val) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_val': best_val, 'optimizer': optimizer.state_dict(), 'valtrack': valtrack, 'freeVision': opts.freeVision, 'curr_val': val_loss, }, is_best) print('** Validation: %f (best) - %d (valtrack)' % (best_val, valtrack))
def main(): global args, best_prec1, weight_decay, momentum model = VideoGaze(batch_size) model.cuda() # optionally resume from a checkpoint cudnn.benchmark = True #Define training loader train_loader = torch.utils.data.DataLoader(ImagerLoader( source_path, face_path, target_path, train_file, transforms.Compose([ transforms.ToTensor(), ]), square=(227, 227), side=side_w), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) #Define validation loader val_loader = torch.utils.data.DataLoader(ImagerLoader( source_path, face_path, target_path, test_file, transforms.Compose([ transforms.ToTensor(), ]), square=(227, 227), side=side_w), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) #Define loss and optimizer criterion = ExponentialShiftedGrids().cuda() criterion_b = nn.BCELoss().cuda() optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) #Training loop for epoch in range(0, epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, criterion_b, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, criterion_b) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): model = im2recipe() model.visionMLP = torch.nn.DataParallel(model.visionMLP) # model = torch.nn.DataParallel(model) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) criterion = cosine_crit # # creating different parameter groups vision_params = list(map(id, model.visionMLP.parameters())) base_params = filter(lambda p: id(p) not in vision_params, model.parameters()) # optimizer - with lr initialized accordingly optimizer = torch.optim.Adam([{ 'params': base_params }, { 'params': model.visionMLP.parameters(), 'lr': opts.lr * opts.freeVision }], lr=opts.lr * opts.freeRecipe) if opts.resume: if os.path.isfile(opts.resume): print("=> loading checkpoint '{}'".format(opts.resume)) checkpoint = torch.load(opts.resume) opts.start_epoch = checkpoint['epoch'] best_val = checkpoint['best_val'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opts.resume)) best_val = float('-inf') else: best_val = float('-inf') optimizer = torch.optim.AdamW([{ 'params': base_params }, { 'params': model.visionMLP.parameters() }]) optimizer.param_groups[0]['lr'] = opts.lr optimizer.param_groups[1]['lr'] = 0 # models are save only when their loss obtains the best value in the validation valtrack = 0 # lr decay print('There are %d parameter groups' % len(optimizer.param_groups)) print('Initial base params lr: {}'.format(optimizer.param_groups[0]['lr'])) print('Initial vision params lr: {}'.format( optimizer.param_groups[1]['lr'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) cudnn.benchmark = True # random_sampler = torch.utils.data.RandomSampler(range(100), replacement=True) # batch_sampler = torch.utils.data.BatchSampler(random_sampler, batch_size=opts.batch_size, drop_last=True) train_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Resize( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 256), # we get only the center of that rescaled transforms.RandomCrop( 224), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), data_path=opts.data_path, partition='train', sem_reg=opts.semantic_reg), num_workers=opts.workers, pin_memory=True, batch_size=opts.batch_size, shuffle=True, drop_last=True, collate_fn=collate_fn) # 238459 recipes print('Training loader prepared, {} recipes'.format( len(train_loader.dataset))) # preparing validation loader val_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Resize( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opts.data_path, partition='val'), num_workers=opts.workers, pin_memory=True, batch_size=100, shuffle=True, drop_last=True, collate_fn=collate_fn) # 51129 recipes print('Validation loader prepared, {} recipes'.format( len(val_loader.dataset))) # run epochs torch.cuda.empty_cache() for epoch in range(opts.start_epoch, opts.epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % opts.valfreq == 0 and epoch != 0: # sum_score = r1i + r5i + r10i + r1t + r5t + r10t sum_score = 0 for i in range(3): sum_score += validate(val_loader, model, criterion) sum_score /= 3 print('Average_score: {}\n'.format(sum_score)) write_log('Average_score: {}\n'.format(sum_score)) if sum_score < best_val: valtrack += 1 if valtrack >= opts.patience: # we switch modalities opts.freeVision = opts.freeRecipe # opts.freeRecipe = not (opts.freeVision) # change the learning rate accordingly adjust_learning_rate(optimizer, epoch, opts) valtrack = 0 # save the best model last_name = opts.snapshots + '{}_lr{}_margin{}_e{}_v-{:.3f}.pth.tar'.format( opts.model, opts.lr, opts.margin, epoch, best_val) is_best = sum_score > best_val best_val = max(sum_score, best_val) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_val': best_val, 'optimizer': optimizer.state_dict(), 'valtrack': valtrack, 'freeVision': opts.freeVision, 'curr_val': sum_score, 'lr': opts.lr, 'margin': opts.margin, 'last_name': last_name }, is_best) print('** Validation: Epoch: {}, Sum_scores: {}, Best: {}'.format( epoch, sum_score, best_val))
def main(): model = im2recipe() model.visionMLP = torch.nn.DataParallel(model.visionMLP) # model = torch.nn.DataParallel(model) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) criterion = cosine_crit # # creating different parameter groups vision_params = list(map(id, model.visionMLP.parameters())) base_params = filter(lambda p: id(p) not in vision_params, model.parameters()) # optimizer - with lr initialized accordingly optimizer = torch.optim.Adam([{ 'params': base_params }, { 'params': model.visionMLP.parameters(), 'lr': opts.lr * opts.freeVision }], lr=opts.lr * opts.freeRecipe) if opts.resume: if os.path.isfile(opts.resume): print("=> loading checkpoint '{}'".format(opts.resume)) checkpoint = torch.load(opts.resume) opts.start_epoch = checkpoint['epoch'] best_val = checkpoint['best_val'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opts.resume)) best_val = float('-inf') else: best_val = float('-inf') # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ # rescale the image keeping the original aspect ratio transforms.Scale(256), # we get only the center of that rescaled transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]), data_path=opts.data_path, partition='val'), num_workers=opts.workers, pin_memory=True, batch_size=100, shuffle=True, drop_last=True, collate_fn=collate_fn) # 51129 recipes print('Validation loader prepared, {} recipes'.format( len(val_loader.dataset))) res = np.zeros(11) for i in range(10): res += validate(val_loader, model, criterion) res /= 10 i2t_info = "Average Image to text: {:.1f}, {:.1f}, {:.1f}, {:.1f}, {:.1f}".format( res[0], res[1], res[2], res[3], res[4]) print(i2t_info) t2i_info = "Average Text to image: {:.1f}, {:.1f}, {:.1f}, {:.1f}, {:.1f}".format( res[5], res[6], res[7], res[8], res[9], ) print(t2i_info) print('Average Sum Score: ', res[10])
batch_size = 16 workers = 8 total_ingredients = 30167 # Found by loading vocab.bin normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( ImagerLoader( "./data/images/", transforms.Compose([ transforms.Scale( 256), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 256), # we get only the center of that rescaled transforms.RandomCrop(224), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), data_path="./data/lmdbs/", partition='train', sem_reg=None), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( ImagerLoader( "./data/images/",