def main(): model = im2recipe(inst_emb=opts.inst_emb) model.visionMLP = torch.nn.DataParallel(model.visionMLP) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) if opts.semantic_reg: weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 # the background class is set to 0, i.e. ignore # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class class_crit = nn.CrossEntropyLoss(weight=weights_class).to(device) # we will use two different criteria criterion = [cosine_crit, class_crit] else: criterion = cosine_crit print("=> loading checkpoint '{}'".format(opts.model_path)) if device.type == 'cpu': checkpoint = torch.load(opts.model_path, encoding='latin1', map_location='cpu') else: checkpoint = torch.load(opts.model_path, encoding='latin1') opts.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.model_path, checkpoint['epoch'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # preparing test loader test_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Resize( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=data_path, sem_reg=opts.semantic_reg, partition='test', n_samples=opts.n_samples), batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=True) print('Test loader prepared.') # run test test(test_loader, model, criterion)
def main(): im_path = opts.test_image_path ext = os.path.basename(im_path).split('.')[-1] if ext not in ['jpeg', 'jpg', 'png']: raise Exception("Wrong image format.") # create model model = im2recipe() model.visionMLP = torch.nn.DataParallel(model.visionMLP) model.to(device) # load checkpoint print("=> loading checkpoint '{}'".format(opts.model_path)) if device.type == 'cpu': checkpoint = torch.load(opts.model_path, encoding='latin1', map_location='cpu') else: checkpoint = torch.load(opts.model_path, encoding='latin1') opts.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.model_path, checkpoint['epoch'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.Scale( 256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(224), # we get only the center of that rescaled transforms.ToTensor(), normalize ]) # load image im = Image.open(im_path).convert('RGB') im = transform(im) im = im.view((1, ) + im.shape) # get model output output = model.visionMLP(im) output = norm(output) output = output.data.cpu().numpy() # save output with open(im_path.replace(ext, 'pkl'), 'wb') as f: pickle.dump(output, f)
def main(): model = im2recipe() model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0]) if not opts.no_cuda: model.cuda() print("=> loading checkpoint '{}'".format(opts.model_path)) checkpoint = torch.load(opts.model_path) opts.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.model_path, checkpoint['epoch'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # preparing test loader test_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Scale( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opts.data_path, sem_reg=opts.semantic_reg, partition='temp'), batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=(not opts.no_cuda)) print('Test loader prepared.') # run test test(test_loader, model)
def main(): model = im2recipe() #model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0,1,2,3]) # barelo: model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0]) # model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0,1]) if not opts.no_cuda: model.cuda() # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1) if not opts.no_cuda: cosine_crit.cuda() # cosine_crit = nn.CosineEmbeddingLoss(0.1) if opts.semantic_reg: weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 # the background class is set to 0, i.e. ignore # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class class_crit = nn.CrossEntropyLoss(weight=weights_class) if not opts.no_cuda: class_crit.cuda() # class_crit = nn.CrossEntropyLoss(weight=weights_class) # we will use two different criteria criterion = [cosine_crit, class_crit] else: criterion = cosine_crit print("=> loading checkpoint '{}'".format(opts.model_path)) checkpoint = torch.load(opts.model_path) opts.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.model_path, checkpoint['epoch'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.Scale( 256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]) dataset = dataset_h5(opts.data_path + '/' + opts.emb_h5_input_file + '.h5', transform=transform) assert dataset print("starting") # preparing test loader test_loader = torch.utils.data.DataLoader(dataset, batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=(not opts.no_cuda)) print 'Test loader prepared.' # run test test(test_loader, model, criterion)
def main(): model = im2recipe(inst_emb=opts.inst_emb) model.visionMLP = torch.nn.DataParallel(model.visionMLP) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) if opts.semantic_reg: weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 # the background class is set to 0, i.e. ignore # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class class_crit = nn.CrossEntropyLoss(weight=weights_class).to(device) # we will use two different criteria criterion = [cosine_crit, class_crit] else: criterion = cosine_crit # # creating different parameter groups vision_params = list(map(id, model.visionMLP.parameters())) base_params = filter(lambda p: id(p) not in vision_params, model.parameters()) # optimizer - with lr initialized accordingly optimizer = torch.optim.Adam([ {'params': base_params}, {'params': model.visionMLP.parameters(), 'lr': opts.lr*opts.freeVision } ], lr=opts.lr*opts.freeRecipe) if opts.resume: if os.path.isfile(opts.resume): print("=> loading checkpoint '{}'".format(opts.resume)) checkpoint = torch.load(opts.resume) opts.start_epoch = checkpoint['epoch'] best_val = checkpoint['best_val'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(opts.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opts.resume)) best_val = float('inf') else: best_val = float('inf') # models are save only when their loss obtains the best value in the validation valtrack = 0 print('There are %d parameter groups' % len(optimizer.param_groups)) print('Initial base params lr: %f' % optimizer.param_groups[0]['lr']) print('Initial vision params lr: %f' % optimizer.param_groups[1]['lr']) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) cudnn.benchmark = True # preparing the training laoder train_loader = torch.utils.data.DataLoader( ImagerLoader(opts.img_path, transforms.Compose([ transforms.Resize(256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(256), # we get only the center of that rescaled transforms.RandomCrop(224), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]),data_path=data_path,sem_reg=opts.semantic_reg,partition='train',n_samples=opts.n_samples), batch_size=opts.batch_size, shuffle=True, num_workers=opts.workers, pin_memory=True) print('Training loader prepared.') # preparing validation loader val_loader = torch.utils.data.DataLoader( ImagerLoader(opts.img_path, transforms.Compose([ transforms.Resize(256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]),data_path=data_path,sem_reg=opts.semantic_reg,partition='val',n_samples=opts.n_samples), batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=True) print('Validation loader prepared.') # run epochs for epoch in range(opts.start_epoch, opts.epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch+1) % opts.valfreq == 0 and epoch != 0: val_loss = validate(val_loader, model, criterion, epoch) # check patience if val_loss >= best_val: valtrack += 1 else: valtrack = 0 if valtrack >= opts.patience: # we switch modalities opts.freeVision = opts.freeRecipe; opts.freeRecipe = not(opts.freeVision) # change the learning rate accordingly adjust_learning_rate(optimizer, epoch, opts) valtrack = 0 # save the best model is_best = val_loss < best_val best_val = min(val_loss, best_val) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_val': best_val, 'optimizer': optimizer.state_dict(), 'valtrack': valtrack, 'freeVision': opts.freeVision, 'curr_val': val_loss, }, is_best) print('** Validation: %f (best) - %d (valtrack)' % (best_val, valtrack))
def main(): model = im2recipe() model.visionMLP = torch.nn.DataParallel(model.visionMLP) # model = torch.nn.DataParallel(model) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) criterion = cosine_crit # # creating different parameter groups vision_params = list(map(id, model.visionMLP.parameters())) base_params = filter(lambda p: id(p) not in vision_params, model.parameters()) # optimizer - with lr initialized accordingly optimizer = torch.optim.Adam([{ 'params': base_params }, { 'params': model.visionMLP.parameters(), 'lr': opts.lr * opts.freeVision }], lr=opts.lr * opts.freeRecipe) if opts.resume: if os.path.isfile(opts.resume): print("=> loading checkpoint '{}'".format(opts.resume)) checkpoint = torch.load(opts.resume) opts.start_epoch = checkpoint['epoch'] best_val = checkpoint['best_val'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opts.resume)) best_val = float('-inf') else: best_val = float('-inf') optimizer = torch.optim.AdamW([{ 'params': base_params }, { 'params': model.visionMLP.parameters() }]) optimizer.param_groups[0]['lr'] = opts.lr optimizer.param_groups[1]['lr'] = 0 # models are save only when their loss obtains the best value in the validation valtrack = 0 # lr decay print('There are %d parameter groups' % len(optimizer.param_groups)) print('Initial base params lr: {}'.format(optimizer.param_groups[0]['lr'])) print('Initial vision params lr: {}'.format( optimizer.param_groups[1]['lr'])) # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) cudnn.benchmark = True # random_sampler = torch.utils.data.RandomSampler(range(100), replacement=True) # batch_sampler = torch.utils.data.BatchSampler(random_sampler, batch_size=opts.batch_size, drop_last=True) train_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Resize( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 256), # we get only the center of that rescaled transforms.RandomCrop( 224), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), data_path=opts.data_path, partition='train', sem_reg=opts.semantic_reg), num_workers=opts.workers, pin_memory=True, batch_size=opts.batch_size, shuffle=True, drop_last=True, collate_fn=collate_fn) # 238459 recipes print('Training loader prepared, {} recipes'.format( len(train_loader.dataset))) # preparing validation loader val_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ transforms.Resize( 256 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 224), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opts.data_path, partition='val'), num_workers=opts.workers, pin_memory=True, batch_size=100, shuffle=True, drop_last=True, collate_fn=collate_fn) # 51129 recipes print('Validation loader prepared, {} recipes'.format( len(val_loader.dataset))) # run epochs torch.cuda.empty_cache() for epoch in range(opts.start_epoch, opts.epochs): # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % opts.valfreq == 0 and epoch != 0: # sum_score = r1i + r5i + r10i + r1t + r5t + r10t sum_score = 0 for i in range(3): sum_score += validate(val_loader, model, criterion) sum_score /= 3 print('Average_score: {}\n'.format(sum_score)) write_log('Average_score: {}\n'.format(sum_score)) if sum_score < best_val: valtrack += 1 if valtrack >= opts.patience: # we switch modalities opts.freeVision = opts.freeRecipe # opts.freeRecipe = not (opts.freeVision) # change the learning rate accordingly adjust_learning_rate(optimizer, epoch, opts) valtrack = 0 # save the best model last_name = opts.snapshots + '{}_lr{}_margin{}_e{}_v-{:.3f}.pth.tar'.format( opts.model, opts.lr, opts.margin, epoch, best_val) is_best = sum_score > best_val best_val = max(sum_score, best_val) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_val': best_val, 'optimizer': optimizer.state_dict(), 'valtrack': valtrack, 'freeVision': opts.freeVision, 'curr_val': sum_score, 'lr': opts.lr, 'margin': opts.margin, 'last_name': last_name }, is_best) print('** Validation: Epoch: {}, Sum_scores: {}, Best: {}'.format( epoch, sum_score, best_val))
def test_oneimage(image_path): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) im = Image.open(image_path).convert('RGB') transform_test_image = transforms.Compose([ transforms.Resize(256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(256), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]) im = transform_test_image(im) with torch.no_grad(): #im = torch.autograd.Variable(im.cuda()) im = torch.autograd.Variable(im) im = torch.unsqueeze(im,0) with open('./data/index_from_id.json', 'r') as f: index_from_id = json.load(f) with open('./data/classes1M.pkl', 'rb') as f: class_dict = pickle.load(f) classindex = pickle.load(f) #checkpoint = torch.load(opts.model_path,encoding='latin1') checkpoint = torch.load(opts.model_path,encoding='latin1',map_location='cpu') model = im2recipe() #device = torch.device('cpu') #model.visionMLP = torch.nn.DataParallel(model.visionMLP) #model.cuda() #print (checkpoint['state_dict']) ''' check_point optimizer state_dict valtrack best_val epoch freeVision curr_val ''' from collections import OrderedDict new_state_dict = OrderedDict() for k, v in checkpoint['state_dict'].items(): #print (k) name = k.replace(".module","") # remove `module.` new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) #model.load_state_dict(checkpoint['state_dict']) #model.cpu() model.eval() output = model.visionMLP(im) output = output.view(output.size(0), -1) #print (output.shape,type(output)) output = model.visual_embedding(output) output = norm(output) output = model.semantic_branch(output) #print ("outputsize",output.size()) #im = torch.Tensor(1, 3, 224, 224) #im=resnet(im) #im = torch.tensor(1, 3, 224, 224) #print(resnet(im)) f**k = nn.functional.softmax(output, 1) #print (f**k) maxk = max((1, )) _, pred = f**k.topk(maxk, 1, True, True) pred = pred.data.cpu().numpy() batch_pre = [] for ii in range(len(pred)): now_pre = [] for j in pred[ii]: now_pre.append(classindex[j]) batch_pre.append(now_pre) for ii in range(len(pred)): for j in range(len(batch_pre[ii])): pred_name = batch_pre[ii][j] bianhao = pred[ii][j] ''' print (pred_name_top1,target_recipe_top1) print(index_from_id[target_recipe_top1]["title"]) print(index_from_id[target_recipe_top1]["ingredients"]) print(index_from_id[target_recipe_top1]["instructions"]) print("......................................") ''' for kk, key in enumerate(index_from_id): if class_dict[key] == bianhao: print(index_from_id[key]["title"]) print (index_from_id[key]["ingredients"]) print (index_from_id[key]["instructions"]) print("************************************") print("......................................")
def main(): model = im2recipe() model.visionMLP = torch.nn.DataParallel(model.visionMLP) # model = torch.nn.DataParallel(model) model.to(device) # define loss function (criterion) and optimizer # cosine similarity between embeddings -> input1, input2, target cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device) criterion = cosine_crit # # creating different parameter groups vision_params = list(map(id, model.visionMLP.parameters())) base_params = filter(lambda p: id(p) not in vision_params, model.parameters()) # optimizer - with lr initialized accordingly optimizer = torch.optim.Adam([{ 'params': base_params }, { 'params': model.visionMLP.parameters(), 'lr': opts.lr * opts.freeVision }], lr=opts.lr * opts.freeRecipe) if opts.resume: if os.path.isfile(opts.resume): print("=> loading checkpoint '{}'".format(opts.resume)) checkpoint = torch.load(opts.resume) opts.start_epoch = checkpoint['epoch'] best_val = checkpoint['best_val'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( opts.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opts.resume)) best_val = float('-inf') else: best_val = float('-inf') # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_loader = torch.utils.data.DataLoader( ImagerLoader( opts.img_path, transforms.Compose([ # rescale the image keeping the original aspect ratio transforms.Scale(256), # we get only the center of that rescaled transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]), data_path=opts.data_path, partition='val'), num_workers=opts.workers, pin_memory=True, batch_size=100, shuffle=True, drop_last=True, collate_fn=collate_fn) # 51129 recipes print('Validation loader prepared, {} recipes'.format( len(val_loader.dataset))) res = np.zeros(11) for i in range(10): res += validate(val_loader, model, criterion) res /= 10 i2t_info = "Average Image to text: {:.1f}, {:.1f}, {:.1f}, {:.1f}, {:.1f}".format( res[0], res[1], res[2], res[3], res[4]) print(i2t_info) t2i_info = "Average Text to image: {:.1f}, {:.1f}, {:.1f}, {:.1f}, {:.1f}".format( res[5], res[6], res[7], res[8], res[9], ) print(t2i_info) print('Average Sum Score: ', res[10])