Ejemplo n.º 1
0
        utils.update_values(options_yaml, vars(opt))
    print(opt)
    cudnn.benchmark = True

    if opt.dataset == 'flickr30k':
        from misc.dataloader_flickr30k import DataLoader
    else:
        from misc.dataloader_coco import DataLoader

    if not os.path.exists(opt.checkpoint_path):
        os.makedirs(opt.checkpoint_path)

    ####################################################################################
    # Data Loader
    ####################################################################################
    dataset = DataLoader(opt, split='train')
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers)

    dataset_val = DataLoader(opt, split=opt.val_split)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=opt.batch_size,
                                                 shuffle=False,
                                                 num_workers=opt.num_workers)

    input_imgs = torch.FloatTensor(1)
    input_seqs = torch.LongTensor(1)
    input_ppls = torch.FloatTensor(1)
    gt_bboxs = torch.FloatTensor(1)
Ejemplo n.º 2
0
        print("please specify the model path...")
        pdb.set_trace()

    cudnn.benchmark = True

    if opt.dataset == 'flickr30k':
        from misc.dataloader_flickr30k import DataLoader
    else:
        from misc.dataloader_coco import DataLoader


    ####################################################################################
    # Data Loader
    ####################################################################################
    opt.data_path = 'data'
    dataset_val = DataLoader(opt, split='test')
    dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=1,
                                            shuffle=False, num_workers=0)

    input_imgs = torch.FloatTensor(1)
    input_seqs = torch.LongTensor(1)
    input_ppls = torch.FloatTensor(1)
    gt_bboxs = torch.FloatTensor(1)
    mask_bboxs = torch.ByteTensor(1)
    gt_seqs = torch.LongTensor(1)
    input_num = torch.LongTensor(1)

    if opt.cuda:
        input_imgs = input_imgs.cuda()
        input_seqs = input_seqs.cuda()
        gt_seqs = gt_seqs.cuda()
Ejemplo n.º 3
0
def eval_relationNBT(opt):

    cudnn.benchmark = True

    if opt.imp_pro == 0.0 and opt.spa_pro == 0.0 and opt.sem_pro == 0.0:
        # no relation module in this pre-trained model
        eval_nbt()
        return

    if opt.dataset == 'flickr30k':
        from misc.dataloader_flickr30k import DataLoader
    else:
        from misc.dataloader_coco import DataLoader

    ####################################################################################
    # Data Loader
    ####################################################################################
    dataset_val = DataLoader(opt, split=opt.val_split)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=1)

    input_imgs = torch.FloatTensor(1)
    input_seqs = torch.LongTensor(1)
    input_ppls = torch.FloatTensor(1)
    gt_bboxs = torch.FloatTensor(1)
    mask_bboxs = torch.ByteTensor(1)
    gt_seqs = torch.LongTensor(1)
    input_num = torch.LongTensor(1)

    if opt.cuda:
        input_imgs = input_imgs.cuda()
        input_seqs = input_seqs.cuda()
        gt_seqs = gt_seqs.cuda()
        input_num = input_num.cuda()
        input_ppls = input_ppls.cuda()
        gt_bboxs = gt_bboxs.cuda()
        mask_bboxs = mask_bboxs.cuda()

    input_imgs = Variable(input_imgs)
    input_seqs = Variable(input_seqs)
    gt_seqs = Variable(gt_seqs)
    input_num = Variable(input_num)
    input_ppls = Variable(input_ppls)
    gt_bboxs = Variable(gt_bboxs)
    mask_bboxs = Variable(mask_bboxs)

    ####################################################################################
    # Build the Model
    ####################################################################################
    opt.vocab_size = dataset_val.vocab_size
    opt.detect_size = dataset_val.detect_size
    opt.seq_length = opt.seq_length
    opt.fg_size = dataset_val.fg_size
    opt.fg_mask = torch.from_numpy(dataset_val.fg_mask).byte()
    opt.glove_fg = torch.from_numpy(dataset_val.glove_fg).float()
    opt.glove_clss = torch.from_numpy(dataset_val.glove_clss).float()
    opt.glove_w = torch.from_numpy(dataset_val.glove_w).float()
    opt.st2towidx = torch.from_numpy(dataset_val.st2towidx).long()

    opt.itow = dataset_val.itow
    opt.itod = dataset_val.itod
    opt.ltow = dataset_val.ltow
    opt.itoc = dataset_val.itoc

    # choose the attention model
    if opt.imp_model:
        opt.relation_type = 'implicit'
        imp_model = build_model(opt, opt.imp_start_from)
        imp_model.eval()
    else:
        imp_model = None

    if opt.spa_model:
        opt.relation_type = 'spatial'
        spa_model = build_model(opt, opt.spa_start_from)
        spa_model.eval()
    else:
        spa_model = None

    if opt.sem_model:
        opt.relation_type = 'semantic'
        sem_model = build_model(opt, opt.sem_start_from)
        sem_model.eval()
    else:
        sem_model = None

    ####################################################################################
    # Evaluate the model
    ####################################################################################

    lang_stats, predictions = eval_fusion_models(opt, dataset_val, opt.imp_pro,
                                                 opt.spa_pro, opt.sem_pro,
                                                 imp_model, spa_model,
                                                 sem_model)

    print('print the evaluation:')
    for k, v in lang_stats.items():
        print('{}:{}'.format(k, v))
Ejemplo n.º 4
0
def eval_nbt(opt):

    cudnn.benchmark = True
    if opt.dataset == 'flickr30k':
        from misc.dataloader_flickr30k import DataLoader
    else:
        from misc.dataloader_coco import DataLoader

    ####################################################################################
    # Data Loader
    ####################################################################################
    dataset_val = DataLoader(opt, split=opt.val_split)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=1)

    input_imgs = torch.FloatTensor(1)
    input_seqs = torch.LongTensor(1)
    input_ppls = torch.FloatTensor(1)
    gt_bboxs = torch.FloatTensor(1)
    mask_bboxs = torch.ByteTensor(1)
    gt_seqs = torch.LongTensor(1)
    input_num = torch.LongTensor(1)

    if opt.cuda:
        input_imgs = input_imgs.cuda()
        input_seqs = input_seqs.cuda()
        gt_seqs = gt_seqs.cuda()
        input_num = input_num.cuda()
        input_ppls = input_ppls.cuda()
        gt_bboxs = gt_bboxs.cuda()
        mask_bboxs = mask_bboxs.cuda()

    input_imgs = Variable(input_imgs)
    input_seqs = Variable(input_seqs)
    gt_seqs = Variable(gt_seqs)
    input_num = Variable(input_num)
    input_ppls = Variable(input_ppls)
    gt_bboxs = Variable(gt_bboxs)
    mask_bboxs = Variable(mask_bboxs)

    ####################################################################################
    # Build the Model
    ####################################################################################
    opt.vocab_size = dataset_val.vocab_size
    opt.detect_size = dataset_val.detect_size
    opt.seq_length = opt.seq_length
    opt.fg_size = dataset_val.fg_size
    opt.fg_mask = torch.from_numpy(dataset_val.fg_mask).byte()
    opt.glove_fg = torch.from_numpy(dataset_val.glove_fg).float()
    opt.glove_clss = torch.from_numpy(dataset_val.glove_clss).float()
    opt.glove_w = torch.from_numpy(dataset_val.glove_w).float()
    opt.st2towidx = torch.from_numpy(dataset_val.st2towidx).long()

    opt.itow = dataset_val.itow
    opt.itod = dataset_val.itod
    opt.ltow = dataset_val.ltow
    opt.itoc = dataset_val.itoc

    # choose the attention model
    if opt.att_model == 'topdown':
        model = AttModel.TopDownModel(opt)
    else:
        model = AttModel.Att2in2Model(opt)

    if opt.start_from is not None:
        if opt.load_best_score == 1:
            model_path = os.path.join(opt.start_from, 'model-best.pth')
            info_path = os.path.join(opt.start_from,
                                     'infos_' + opt.id + '-best.pkl')
        else:
            model_path = os.path.join(opt.start_from, 'model.pth')
            info_path = os.path.join(opt.start_from,
                                     'infos_' + opt.id + '.pkl')

        # opt.learning_rate = saved_model_opt.learning_rate
        print('Loading the model weights, path is %s...' % (model_path))
        model.load_state_dict(torch.load(model_path))

    if opt.mGPUs:
        model = nn.DataParallel(model)

    if opt.cuda:
        model.cuda()

    ####################################################################################
    # Evaluate the model
    ####################################################################################
    lang_stats, predictions = eval_NBT(opt,
                                       model,
                                       dataset_val,
                                       processing='eval')

    print('print the evaluation:')
    for k, v in lang_stats.items():
        print('{}:{}'.format(k, v))
Ejemplo n.º 5
0
def demo_relationNBT(opt):
    cudnn.benchmark = True

    if opt.dataset == 'flickr30k':
        from misc.dataloader_flickr30k import DataLoader
    else:
        from misc.dataloader_coco import DataLoader

    ####################################################################################
    # Data Loader
    ####################################################################################
    dataset_val = DataLoader(opt, split=opt.val_split)
    # dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=1,
    #                                              shuffle=False, num_workers=1)

    input_imgs = torch.FloatTensor(1)
    input_seqs = torch.LongTensor(1)
    input_ppls = torch.FloatTensor(1)
    gt_bboxs = torch.FloatTensor(1)
    mask_bboxs = torch.ByteTensor(1)
    gt_seqs = torch.LongTensor(1)
    input_num = torch.LongTensor(1)

    if opt.cuda:
        input_imgs = input_imgs.cuda()
        input_seqs = input_seqs.cuda()
        gt_seqs = gt_seqs.cuda()
        input_num = input_num.cuda()
        input_ppls = input_ppls.cuda()
        gt_bboxs = gt_bboxs.cuda()
        mask_bboxs = mask_bboxs.cuda()

    input_imgs = Variable(input_imgs)
    input_seqs = Variable(input_seqs)
    gt_seqs = Variable(gt_seqs)
    input_num = Variable(input_num)
    input_ppls = Variable(input_ppls)
    gt_bboxs = Variable(gt_bboxs)
    mask_bboxs = Variable(mask_bboxs)

    ####################################################################################
    # Build the Model
    ####################################################################################
    opt.vocab_size = dataset_val.vocab_size
    opt.detect_size = dataset_val.detect_size
    opt.seq_length = opt.seq_length
    opt.fg_size = dataset_val.fg_size
    opt.fg_mask = torch.from_numpy(dataset_val.fg_mask).byte()
    opt.glove_fg = torch.from_numpy(dataset_val.glove_fg).float()
    opt.glove_clss = torch.from_numpy(dataset_val.glove_clss).float()
    opt.glove_w = torch.from_numpy(dataset_val.glove_w).float()
    opt.st2towidx = torch.from_numpy(dataset_val.st2towidx).long()

    opt.itow = dataset_val.itow
    opt.itod = dataset_val.itod
    opt.ltow = dataset_val.ltow
    opt.itoc = dataset_val.itoc

    # choose the attention model
    save_name = ''
    if opt.imp_model:
        opt.relation_type = 'implicit'
        imp_model = build_model(opt, opt.imp_start_from)
        imp_model.eval()
        save_name += '_imp'
    else:
        imp_model = None

    if opt.spa_model:
        opt.relation_type = 'spatial'
        spa_model = build_model(opt, opt.spa_start_from)
        spa_model.eval()
        save_name += '_spa'
    else:
        spa_model = None

    if opt.sem_model:
        opt.relation_type = 'semantic'
        sem_model = build_model(opt, opt.sem_start_from)
        sem_model.eval()
        save_name += '_sem'
    else:
        sem_model = None

    ####################################################################################
    # Evaluate the model
    ####################################################################################
    predictions = demo_fusion_models(opt, dataset_val, opt.imp_pro,
                                     opt.spa_pro, opt.sem_pro, imp_model,
                                     spa_model, sem_model, save_name)
    print('saving...')
    json.dump(
        predictions,
        open(
            '/import/nobackup_mmv_ioannisp/tx301/vg_feature/visu_relation/visu_relation'
            + save_name + '.json', 'w'))