예제 #1
0
def main():

    model = im2recipe(inst_emb=opts.inst_emb)
    model.visionMLP = torch.nn.DataParallel(model.visionMLP)
    model.to(device)

    # define loss function (criterion) and optimizer
    # cosine similarity between embeddings -> input1, input2, target
    cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device)
    if opts.semantic_reg:
        weights_class = torch.Tensor(opts.numClasses).fill_(1)
        weights_class[0] = 0  # the background class is set to 0, i.e. ignore
        # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class
        class_crit = nn.CrossEntropyLoss(weight=weights_class).to(device)
        # we will use two different criteria
        criterion = [cosine_crit, class_crit]
    else:
        criterion = cosine_crit

    print("=> loading checkpoint '{}'".format(opts.model_path))
    if device.type == 'cpu':
        checkpoint = torch.load(opts.model_path,
                                encoding='latin1',
                                map_location='cpu')
    else:
        checkpoint = torch.load(opts.model_path, encoding='latin1')
    opts.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        opts.model_path, checkpoint['epoch']))

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # preparing test loader
    test_loader = torch.utils.data.DataLoader(
        ImagerLoader(
            opts.img_path,
            transforms.Compose([
                transforms.Resize(
                    256
                ),  # rescale the image keeping the original aspect ratio
                transforms.CenterCrop(
                    224),  # we get only the center of that rescaled
                transforms.ToTensor(),
                normalize,
            ]),
            data_path=data_path,
            sem_reg=opts.semantic_reg,
            partition='test',
            n_samples=opts.n_samples),
        batch_size=opts.batch_size,
        shuffle=False,
        num_workers=opts.workers,
        pin_memory=True)
    print('Test loader prepared.')

    # run test
    test(test_loader, model, criterion)
def main():

    im_path = opts.test_image_path
    ext = os.path.basename(im_path).split('.')[-1]
    if ext not in ['jpeg', 'jpg', 'png']:
        raise Exception("Wrong image format.")

    # create model
    model = im2recipe()
    model.visionMLP = torch.nn.DataParallel(model.visionMLP)
    model.to(device)

    # load checkpoint
    print("=> loading checkpoint '{}'".format(opts.model_path))
    if device.type == 'cpu':
        checkpoint = torch.load(opts.model_path,
                                encoding='latin1',
                                map_location='cpu')
    else:
        checkpoint = torch.load(opts.model_path, encoding='latin1')
    opts.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        opts.model_path, checkpoint['epoch']))

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([
        transforms.Scale(
            256),  # rescale the image keeping the original aspect ratio
        transforms.CenterCrop(224),  # we get only the center of that rescaled
        transforms.ToTensor(),
        normalize
    ])

    # load image
    im = Image.open(im_path).convert('RGB')
    im = transform(im)
    im = im.view((1, ) + im.shape)
    # get model output
    output = model.visionMLP(im)
    output = norm(output)
    output = output.data.cpu().numpy()
    # save output
    with open(im_path.replace(ext, 'pkl'), 'wb') as f:
        pickle.dump(output, f)
예제 #3
0
def main():

    model = im2recipe()
    model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0])
    if not opts.no_cuda:
        model.cuda()

    print("=> loading checkpoint '{}'".format(opts.model_path))
    checkpoint = torch.load(opts.model_path)
    opts.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        opts.model_path, checkpoint['epoch']))

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # preparing test loader
    test_loader = torch.utils.data.DataLoader(
        ImagerLoader(
            opts.img_path,
            transforms.Compose([
                transforms.Scale(
                    256
                ),  # rescale the image keeping the original aspect ratio
                transforms.CenterCrop(
                    224),  # we get only the center of that rescaled
                transforms.ToTensor(),
                normalize,
            ]),
            data_path=opts.data_path,
            sem_reg=opts.semantic_reg,
            partition='temp'),
        batch_size=opts.batch_size,
        shuffle=False,
        num_workers=opts.workers,
        pin_memory=(not opts.no_cuda))
    print('Test loader prepared.')

    # run test
    test(test_loader, model)
예제 #4
0
def main():

    model = im2recipe()
    #model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0,1,2,3])
    # barelo:
    model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0])
    # model.visionMLP = torch.nn.DataParallel(model.visionMLP, device_ids=[0,1])
    if not opts.no_cuda:
        model.cuda()

    # define loss function (criterion) and optimizer
    # cosine similarity between embeddings -> input1, input2, target
    cosine_crit = nn.CosineEmbeddingLoss(0.1)
    if not opts.no_cuda:
        cosine_crit.cuda()
    # cosine_crit = nn.CosineEmbeddingLoss(0.1)
    if opts.semantic_reg:
        weights_class = torch.Tensor(opts.numClasses).fill_(1)
        weights_class[0] = 0  # the background class is set to 0, i.e. ignore
        # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class
        class_crit = nn.CrossEntropyLoss(weight=weights_class)
        if not opts.no_cuda:
            class_crit.cuda()
        # class_crit = nn.CrossEntropyLoss(weight=weights_class)
        # we will use two different criteria
        criterion = [cosine_crit, class_crit]
    else:
        criterion = cosine_crit

    print("=> loading checkpoint '{}'".format(opts.model_path))
    checkpoint = torch.load(opts.model_path)
    opts.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        opts.model_path, checkpoint['epoch']))

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    transform = transforms.Compose([
        transforms.Scale(
            256),  # rescale the image keeping the original aspect ratio
        transforms.CenterCrop(224),  # we get only the center of that rescaled
        transforms.ToTensor(),
        normalize,
    ])

    dataset = dataset_h5(opts.data_path + '/' + opts.emb_h5_input_file + '.h5',
                         transform=transform)
    assert dataset

    print("starting")

    # preparing test loader
    test_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=opts.batch_size,
                                              shuffle=False,
                                              num_workers=opts.workers,
                                              pin_memory=(not opts.no_cuda))
    print 'Test loader prepared.'

    # run test
    test(test_loader, model, criterion)
예제 #5
0
def main():

    model = im2recipe(inst_emb=opts.inst_emb)
    model.visionMLP = torch.nn.DataParallel(model.visionMLP)
    model.to(device)

    # define loss function (criterion) and optimizer
    # cosine similarity between embeddings -> input1, input2, target
    cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device)
    if opts.semantic_reg:
        weights_class = torch.Tensor(opts.numClasses).fill_(1)
        weights_class[0] = 0 # the background class is set to 0, i.e. ignore
        # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class
        class_crit = nn.CrossEntropyLoss(weight=weights_class).to(device)
        # we will use two different criteria
        criterion = [cosine_crit, class_crit]
    else:
        criterion = cosine_crit

    # # creating different parameter groups
    vision_params = list(map(id, model.visionMLP.parameters()))
    base_params   = filter(lambda p: id(p) not in vision_params, model.parameters())
   
    # optimizer - with lr initialized accordingly
    optimizer = torch.optim.Adam([
                {'params': base_params},
                {'params': model.visionMLP.parameters(), 'lr': opts.lr*opts.freeVision }
            ], lr=opts.lr*opts.freeRecipe)

    if opts.resume:
        if os.path.isfile(opts.resume):
            print("=> loading checkpoint '{}'".format(opts.resume))
            checkpoint = torch.load(opts.resume)
            opts.start_epoch = checkpoint['epoch']
            best_val = checkpoint['best_val']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(opts.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(opts.resume))
            best_val = float('inf') 
    else:
        best_val = float('inf') 

    # models are save only when their loss obtains the best value in the validation
    valtrack = 0

    print('There are %d parameter groups' % len(optimizer.param_groups))
    print('Initial base params lr: %f' % optimizer.param_groups[0]['lr'])
    print('Initial vision params lr: %f' % optimizer.param_groups[1]['lr'])

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    
    cudnn.benchmark = True

    # preparing the training laoder
    train_loader = torch.utils.data.DataLoader(
        ImagerLoader(opts.img_path,
            transforms.Compose([
            transforms.Resize(256), # rescale the image keeping the original aspect ratio
            transforms.CenterCrop(256), # we get only the center of that rescaled
            transforms.RandomCrop(224), # random crop within the center crop 
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]),data_path=data_path,sem_reg=opts.semantic_reg,partition='train',n_samples=opts.n_samples),
        batch_size=opts.batch_size, shuffle=True,
        num_workers=opts.workers, pin_memory=True)
    print('Training loader prepared.')

    # preparing validation loader 
    val_loader = torch.utils.data.DataLoader(
        ImagerLoader(opts.img_path,
            transforms.Compose([
            transforms.Resize(256), # rescale the image keeping the original aspect ratio
            transforms.CenterCrop(224), # we get only the center of that rescaled
            transforms.ToTensor(),
            normalize,
        ]),data_path=data_path,sem_reg=opts.semantic_reg,partition='val',n_samples=opts.n_samples),
        batch_size=opts.batch_size, shuffle=False,
        num_workers=opts.workers, pin_memory=True)
    print('Validation loader prepared.')

    # run epochs
    for epoch in range(opts.start_epoch, opts.epochs):
        
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch+1) % opts.valfreq == 0 and epoch != 0:
            val_loss = validate(val_loader, model, criterion, epoch)
        
            # check patience
            if val_loss >= best_val:
                valtrack += 1
            else:
                valtrack = 0
            if valtrack >= opts.patience:
                # we switch modalities
                opts.freeVision = opts.freeRecipe; opts.freeRecipe = not(opts.freeVision)
                # change the learning rate accordingly
                adjust_learning_rate(optimizer, epoch, opts) 
                valtrack = 0

            # save the best model
            is_best = val_loss < best_val
            best_val = min(val_loss, best_val)
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_val': best_val,
                'optimizer': optimizer.state_dict(),
                'valtrack': valtrack,
                'freeVision': opts.freeVision,
                'curr_val': val_loss,
            }, is_best)

            print('** Validation: %f (best) - %d (valtrack)' % (best_val, valtrack))
예제 #6
0
def main():
    model = im2recipe()
    model.visionMLP = torch.nn.DataParallel(model.visionMLP)
    # model = torch.nn.DataParallel(model)
    model.to(device)

    # define loss function (criterion) and optimizer
    # cosine similarity between embeddings -> input1, input2, target
    cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device)
    criterion = cosine_crit

    # # creating different parameter groups
    vision_params = list(map(id, model.visionMLP.parameters()))
    base_params = filter(lambda p: id(p) not in vision_params,
                         model.parameters())

    # optimizer - with lr initialized accordingly
    optimizer = torch.optim.Adam([{
        'params': base_params
    }, {
        'params': model.visionMLP.parameters(),
        'lr': opts.lr * opts.freeVision
    }],
                                 lr=opts.lr * opts.freeRecipe)

    if opts.resume:
        if os.path.isfile(opts.resume):
            print("=> loading checkpoint '{}'".format(opts.resume))
            checkpoint = torch.load(opts.resume)
            opts.start_epoch = checkpoint['epoch']
            best_val = checkpoint['best_val']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                opts.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(opts.resume))
            best_val = float('-inf')
    else:
        best_val = float('-inf')

        optimizer = torch.optim.AdamW([{
            'params': base_params
        }, {
            'params': model.visionMLP.parameters()
        }])
    optimizer.param_groups[0]['lr'] = opts.lr
    optimizer.param_groups[1]['lr'] = 0

    # models are save only when their loss obtains the best value in the validation
    valtrack = 0  # lr decay

    print('There are %d parameter groups' % len(optimizer.param_groups))
    print('Initial base params lr: {}'.format(optimizer.param_groups[0]['lr']))
    print('Initial vision params lr: {}'.format(
        optimizer.param_groups[1]['lr']))

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    cudnn.benchmark = True

    # random_sampler = torch.utils.data.RandomSampler(range(100), replacement=True)
    # batch_sampler = torch.utils.data.BatchSampler(random_sampler, batch_size=opts.batch_size, drop_last=True)
    train_loader = torch.utils.data.DataLoader(
        ImagerLoader(
            opts.img_path,
            transforms.Compose([
                transforms.Resize(
                    256
                ),  # rescale the image keeping the original aspect ratio
                transforms.CenterCrop(
                    256),  # we get only the center of that rescaled
                transforms.RandomCrop(
                    224),  # random crop within the center crop
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]),
            data_path=opts.data_path,
            partition='train',
            sem_reg=opts.semantic_reg),
        num_workers=opts.workers,
        pin_memory=True,
        batch_size=opts.batch_size,
        shuffle=True,
        drop_last=True,
        collate_fn=collate_fn)
    # 238459 recipes
    print('Training loader prepared, {} recipes'.format(
        len(train_loader.dataset)))

    # preparing validation loader

    val_loader = torch.utils.data.DataLoader(
        ImagerLoader(
            opts.img_path,
            transforms.Compose([
                transforms.Resize(
                    256
                ),  # rescale the image keeping the original aspect ratio
                transforms.CenterCrop(
                    224),  # we get only the center of that rescaled
                transforms.ToTensor(),
                normalize,
            ]),
            data_path=opts.data_path,
            partition='val'),
        num_workers=opts.workers,
        pin_memory=True,
        batch_size=100,
        shuffle=True,
        drop_last=True,
        collate_fn=collate_fn)
    # 51129 recipes
    print('Validation loader prepared, {} recipes'.format(
        len(val_loader.dataset)))
    # run epochs
    torch.cuda.empty_cache()
    for epoch in range(opts.start_epoch, opts.epochs):

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % opts.valfreq == 0 and epoch != 0:
            #  sum_score = r1i + r5i + r10i + r1t + r5t + r10t
            sum_score = 0
            for i in range(3):
                sum_score += validate(val_loader, model, criterion)
            sum_score /= 3
            print('Average_score: {}\n'.format(sum_score))
            write_log('Average_score: {}\n'.format(sum_score))

            if sum_score < best_val:
                valtrack += 1
            if valtrack >= opts.patience:
                # we switch modalities
                opts.freeVision = opts.freeRecipe
                # opts.freeRecipe = not (opts.freeVision)
                # change the learning rate accordingly
                adjust_learning_rate(optimizer, epoch, opts)
                valtrack = 0

            # save the best model
            last_name = opts.snapshots + '{}_lr{}_margin{}_e{}_v-{:.3f}.pth.tar'.format(
                opts.model, opts.lr, opts.margin, epoch, best_val)
            is_best = sum_score > best_val
            best_val = max(sum_score, best_val)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_val': best_val,
                    'optimizer': optimizer.state_dict(),
                    'valtrack': valtrack,
                    'freeVision': opts.freeVision,
                    'curr_val': sum_score,
                    'lr': opts.lr,
                    'margin': opts.margin,
                    'last_name': last_name
                }, is_best)

            print('** Validation: Epoch: {}, Sum_scores: {}, Best: {}'.format(
                epoch, sum_score, best_val))
def test_oneimage(image_path):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    im = Image.open(image_path).convert('RGB')
    transform_test_image = transforms.Compose([
                         transforms.Resize(256),  # rescale the image keeping the original aspect ratio
                         transforms.CenterCrop(256),  # we get only the center of that rescaled
                         transforms.ToTensor(),
                         normalize,
                     ])
    im = transform_test_image(im)
    with torch.no_grad():
        #im = torch.autograd.Variable(im.cuda())
        im = torch.autograd.Variable(im)

    im = torch.unsqueeze(im,0)


    with open('./data/index_from_id.json', 'r') as f:
        index_from_id = json.load(f)

    with open('./data/classes1M.pkl', 'rb') as f:
        class_dict = pickle.load(f)
        classindex = pickle.load(f)
    #checkpoint = torch.load(opts.model_path,encoding='latin1')
    checkpoint = torch.load(opts.model_path,encoding='latin1',map_location='cpu')


    model = im2recipe()
    #device = torch.device('cpu')
    #model.visionMLP = torch.nn.DataParallel(model.visionMLP)
    #model.cuda()
    #print (checkpoint['state_dict'])
    '''
    check_point
    optimizer
    state_dict
    valtrack
    best_val
    epoch
    freeVision
    curr_val
    '''
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in checkpoint['state_dict'].items():
        #print (k)
        name = k.replace(".module","") # remove `module.`
        new_state_dict[name] = v
    # load params
    model.load_state_dict(new_state_dict)
    #model.load_state_dict(checkpoint['state_dict'])
    #model.cpu()
    model.eval()
    output = model.visionMLP(im)
    output = output.view(output.size(0), -1)
    #print (output.shape,type(output))
    output = model.visual_embedding(output)
    output = norm(output)
    output = model.semantic_branch(output)

    #print ("outputsize",output.size())
    #im = torch.Tensor(1, 3, 224, 224)
    #im=resnet(im)
    #im = torch.tensor(1, 3, 224, 224)
    #print(resnet(im))
    f**k = nn.functional.softmax(output, 1)
    #print (f**k)
    maxk = max((1, ))
    _, pred = f**k.topk(maxk, 1, True, True)

    pred = pred.data.cpu().numpy()

    batch_pre = []
    for ii in range(len(pred)):
        now_pre = []
        for j in pred[ii]:
            now_pre.append(classindex[j])

        batch_pre.append(now_pre)

    for ii in range(len(pred)):
        for j in range(len(batch_pre[ii])):
            pred_name = batch_pre[ii][j]
            bianhao = pred[ii][j]

            '''
            print (pred_name_top1,target_recipe_top1)
            print(index_from_id[target_recipe_top1]["title"])
            print(index_from_id[target_recipe_top1]["ingredients"])
            print(index_from_id[target_recipe_top1]["instructions"])
            print("......................................")
            '''
            for kk, key in enumerate(index_from_id):
                if class_dict[key] == bianhao:
                    print(index_from_id[key]["title"])
                    print (index_from_id[key]["ingredients"])
                    print (index_from_id[key]["instructions"])
                    print("************************************")
            print("......................................")
예제 #8
0
def main():
    model = im2recipe()
    model.visionMLP = torch.nn.DataParallel(model.visionMLP)
    # model = torch.nn.DataParallel(model)
    model.to(device)

    # define loss function (criterion) and optimizer
    # cosine similarity between embeddings -> input1, input2, target
    cosine_crit = nn.CosineEmbeddingLoss(0.1).to(device)
    criterion = cosine_crit

    # # creating different parameter groups
    vision_params = list(map(id, model.visionMLP.parameters()))
    base_params = filter(lambda p: id(p) not in vision_params,
                         model.parameters())

    # optimizer - with lr initialized accordingly
    optimizer = torch.optim.Adam([{
        'params': base_params
    }, {
        'params': model.visionMLP.parameters(),
        'lr': opts.lr * opts.freeVision
    }],
                                 lr=opts.lr * opts.freeRecipe)

    if opts.resume:
        if os.path.isfile(opts.resume):
            print("=> loading checkpoint '{}'".format(opts.resume))
            checkpoint = torch.load(opts.resume)
            opts.start_epoch = checkpoint['epoch']
            best_val = checkpoint['best_val']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                opts.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(opts.resume))
            best_val = float('-inf')
    else:
        best_val = float('-inf')

    # data preparation, loaders
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    val_loader = torch.utils.data.DataLoader(
        ImagerLoader(
            opts.img_path,
            transforms.Compose([
                # rescale the image keeping the original aspect ratio
                transforms.Scale(256),
                # we get only the center of that rescaled
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ]),
            data_path=opts.data_path,
            partition='val'),
        num_workers=opts.workers,
        pin_memory=True,
        batch_size=100,
        shuffle=True,
        drop_last=True,
        collate_fn=collate_fn)
    # 51129 recipes
    print('Validation loader prepared, {} recipes'.format(
        len(val_loader.dataset)))

    res = np.zeros(11)
    for i in range(10):
        res += validate(val_loader, model, criterion)
    res /= 10
    i2t_info = "Average Image to text: {:.1f}, {:.1f}, {:.1f}, {:.1f}, {:.1f}".format(
        res[0], res[1], res[2], res[3], res[4])
    print(i2t_info)
    t2i_info = "Average Text to image: {:.1f}, {:.1f}, {:.1f}, {:.1f}, {:.1f}".format(
        res[5],
        res[6],
        res[7],
        res[8],
        res[9],
    )
    print(t2i_info)
    print('Average Sum Score: ', res[10])