Ejemplo n.º 1
0
    parser.add_argument('--seed', type=int, default=1204, help='random seed')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    utils.create_dir(args.output)
    logger = utils.Logger(os.path.join(args.output, 'log.txt'))
    logger.write(args.__repr__())

    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary, adaptive=True)
    val_dset = VQAFeatureDataset('val', dictionary, adaptive=True)

    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid, args.op,
                                             args.gamma).cuda()

    tfidf = None
    weights = None

    if args.tfidf:
        dict = Dictionary.load_from_file('data/dictionary.pkl')
        tfidf, weights = tfidf_from_questions(['train', 'val', 'test2015'],
                                              dict)
Ejemplo n.º 2
0
    return args


if __name__ == '__main__':
    args = parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.benchmark = True

    if args.task == 'vqa':
        from train import train
        dict_path = 'data/dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        train_dset = VQAFeatureDataset('train',
                                       dictionary,
                                       adaptive=args.unadaptive,
                                       use_counter=args.use_counter)
        val_dset = VQAFeatureDataset('val',
                                     dictionary,
                                     adaptive=args.unadaptive,
                                     use_counter=args.use_counter)
        w_emb_path = 'data/glove6b_init_300d.npy'

    elif args.task == 'flickr':
        from train_flickr import train
        dict_path = 'data/flickr30k/dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        train_dset = Flickr30kFeatureDataset('train', dictionary)
        val_dset = Flickr30kFeatureDataset('val', dictionary)
        w_emb_path = 'data/flickr30k/glove6b_init_300d.npy'
        args.op = ''
Ejemplo n.º 3
0
def main():
    args = parse_args()
    dataset = args.dataset
    args.output = os.path.join('logs', args.output)
    if not os.path.isdir(args.output):
        utils.create_dir(args.output)
    else:
        if click.confirm('Exp directory already exists in {}. Erase?'.format(
                args.output, default=False)):
            os.system('rm -r ' + args.output)
            utils.create_dir(args.output)

        else:
            os._exit(1)

    if dataset == 'cpv1':
        dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl')
    elif dataset == 'cpv2' or dataset == 'v2':
        dictionary = Dictionary.load_from_file('data/dictionary.pkl')

    print("Building train dataset...")
    train_dset = VQAFeatureDataset('train',
                                   dictionary,
                                   dataset=dataset,
                                   cache_image_features=args.cache_features)

    print("Building test dataset...")
    eval_dset = VQAFeatureDataset('val',
                                  dictionary,
                                  dataset=dataset,
                                  cache_image_features=args.cache_features)

    get_bias(train_dset, eval_dset)

    # Build the model using the original constructor 使用原始构造函数构建模型
    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    if dataset == 'cpv1':
        model.w_emb.init_embedding('data/glove6b_init_300d_v1.npy')
    elif dataset == 'cpv2' or dataset == 'v2':
        model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    # Add the loss_fn based our arguments 根据我们的参数添加loss_fn
    if args.debias == "bias_product":
        model.debias_loss_fn = BiasProduct()
    elif args.debias == "none":
        model.debias_loss_fn = Plain()
    elif args.debias == "reweight":
        model.debias_loss_fn = ReweightByInvBias()
    elif args.debias == "learned_mixin":
        model.debias_loss_fn = LearnedMixin(args.entropy_penalty)
    elif args.debias == 'focal':
        model.debias_loss_fn = Focal()
    else:
        raise RuntimeError(args.mode)

    with open('util/qid2type_%s.json' % args.dataset, 'r') as f:
        qid2type = json.load(f)
    model = model.cuda()
    batch_size = args.batch_size

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    train_loader = DataLoader(train_dset,
                              batch_size,
                              shuffle=True,
                              num_workers=0)
    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=0)

    print("Starting training...")
    train(model, train_loader, eval_loader, args, qid2type)
Ejemplo n.º 4
0
    results = []
    for i in range(logits.size(0)):
        result = {}
        result['question_id'] = qIds[i].item()
        result['answer'] = get_answer(logits[i], dataloader)
        results.append(result)
    return results


if __name__ == '__main__':
    args = parse_args()

    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    eval_dset = VQAFeatureDataset(args.split, dictionary, adaptive=True)

    n_device = torch.cuda.device_count()
    batch_size = args.batch_size * n_device

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(eval_dset, args.num_hid, args.op,
                                             args.gamma).cuda()
    eval_loader = DataLoader(eval_dset,
                             batch_size,
                             shuffle=False,
                             num_workers=1,
                             collate_fn=utils.trim_collate)

    def process(args, model, eval_loader):
        model_path = args.input+'/model%s.pth' % \
Ejemplo n.º 5
0
    print(args)

    if args.task == 'train':

        torch.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True

        start = time.time()
        batch_size = args.batch_size
        train_batch = batch_size
        test_batch = batch_size

        dictionary = Dictionary.load_from_file('data/dictionary.pkl')
        if args.train_dataset == 'all':
            train_dset = VQAFeatureDataset('train', dictionary, filter_pair=False)
        elif args.train_dataset == 'filter':
            train_dset = VQAFeatureDataset('train', dictionary, filter_pair=True)
        elif args.train_dataset == 'pairwise':
            train_batch = batch_size / 2
            train_dset = VQAFeatureDatasetWithPair('train', dictionary)
        elif args.train_dataset == 'end2end':
            train_dset = VQAFeatureDatasetEnd2End('train', dictionary, filter_pair=False)
        elif args.train_dataset == 'all_pair':
            train_dset_all = VQAFeatureDataset('train', dictionary, filter_pair=False)
            train_dset_pair = VQAFeatureDatasetWithPair('train', dictionary, preloaded=train_dset_all.pre_loaded())
            train_dset = train_dset_all  # for model building: dset.vdim and co.
        elif args.train_dataset == 'allpair':
            train_batch = batch_size / 2
            train_dset = VQAFeatureDatasetAllPair('train', dictionary)
        elif args.train_dataset == 'trainval':
Ejemplo n.º 6
0
    parser.add_argument('--num_hid', type=int, default=1024)
    parser.add_argument('--model', type=str, default='baseline0_newatt')
    parser.add_argument('--output', type=str, default='saved_models/exp0')
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('../../bottom-up-attention-vqa-tf/data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary)
    eval_dset = VQAFeatureDataset('val', dictionary)
    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('../../bottom-up-attention-vqa-tf/data/glove6b_init_300d.npy')

    model = nn.DataParallel(model).cuda()

    train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1)
    eval_loader =  DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1)
    train(model, train_loader, eval_loader, args.epochs, args.output)
Ejemplo n.º 7
0
def imageAdv(args, imageAdvF):
    # Fetch data.
    questionIds = cPickle.load(open("data/goodQuestions.pkl", "rb"))
    targetDict = cPickle.load(open("data/q2t.pkl", "rb"))
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    print "Fetching eval data"
    imageLoader = imageModel.ImageLoader("data/val2014img", "val")
    dataset = VQAFeatureDataset('valSample',
                                args.evalset_name,
                                dictionary,
                                imageLoader=imageLoader,
                                questionIds=questionIds)

    # Fetch model.
    model = imageModel.getCombinedModel(args, dataset)
    model = nn.DataParallel(model).cuda()

    # Train and save.
    label2ans = dataset.label2ans
    # imageSaverOld = imageModel.ImageSaver("data/adv1Old")
    imageSaverNew = imageModel.ImageSaver("data/adv6New", True)
    numSuccess = 0
    successList = []
    iterList = []
    targetList = []
    predictedList = []
    for i, vqaInfo in enumerate(dataset):
        entry = dataset.entries[i]
        if entry["question_id"] not in targetDict:
            continue
        targets = targetDict[entry["question_id"]]
        target = None
        while target is None or target in entry["answer"]["labels"]:
            target = targets[np.random.randint(len(targets))]
        print ""
        print "questionId: {}, imgId: {}".format(entry["question_id"],
                                                 entry["image_id"])
        print "Answers: {}".format(
            [label2ans[label] for label in entry["answer"]["labels"]])
        print "Target: {}".format(label2ans[target])

        startTime = time.time()
        success, imgOld, imgNew, predicted, iters = imageAdvF(
            model, vqaInfo, entry, target, dataset)
        print "Num: {0}, Success: {1}, Predicted: {2}, Iters: {3}, Taken: {4:.2f}".format(
            i, success, label2ans[predicted], iters,
            time.time() - startTime)

        if success:
            numSuccess += 1
            successList.append(True)
        else:
            successList.append(False)
        iterList.append(iters)
        targetList.append(target)
        predictedList.append(int(predicted.numpy()))

        qidStr = str(entry["question_id"])
        # imageSaverOld.saveImage(imgOld, prefix + imgIdStr + ".jpg")
        imageSaverNew.saveImage(imgNew, qidStr + ".png")

    print ""
    print "Success: {}".format(numSuccess)
    print "SuccessList: {}".format(successList)
    print "IterList: {}".format(iterList)
    print "TargetList: {}".format(targetList)
    print "PredictedList: {}".format(predictedList)

    np.savez("data/adv6Out.npz",
             successList=successList,
             iterList=iterList,
             targetList=targetList,
             predictedList=predictedList)
Ejemplo n.º 8
0
        score += batch_score
        upper_bound += (a.max(1)[0]).sum()
        num_data += pred.size(0)

    score = score / len(dataloader.dataset)
    V_loss /= len(dataloader.dataset)
    upper_bound = upper_bound / len(dataloader.dataset)

    return score, upper_bound, V_loss


args = parse_args()
dictionary = Dictionary.load_from_file('./data/dictionary.pkl')

# test_dset = VQAFeatureDataset('test', dictionary)
eval_dset = VQAFeatureDataset('val', dictionary)
model = build_model_A3x2(eval_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
                               activation=args.activation, dropL=args.dropout_L, dropG=args.dropout_G,\
                               dropW=args.dropout_W, dropC=args.dropout_C)
model = model.cuda()
model.w_emb.init_embedding('data/glove6b_init_300d.npy')
model = nn.DataParallel(model).cuda()

# model.apply(weights_init_ku)

ckpt = torch.load(MODEL_PATH)
model.load_state_dict(ckpt)

eval_loader = DataLoader(eval_dset, BATCH_SIZE, shuffle=True, num_workers=4)
# test_loader  = DataLoader(test_dset, BATCH_SIZE, shuffle=True, num_workers=4)
Ejemplo n.º 9
0
    parser.add_argument('--lambda', type=float, default=0.01)
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    params = vars(args)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    # dictionary = Dictionary.load_from_file('./dictionary.pkl')
    dictionary = load_dictionary(args.sentense_file_path, args.task)
    if not args.test_phase:
        train_dset = VQAFeatureDataset(args.task, dictionary, args.sentense_file_path,args.feat_category,args.feat_path, mode='Train')
        eval_dset = VQAFeatureDataset(args.task, dictionary, args.sentense_file_path,args.feat_category,args.feat_path, mode='Test')
        batch_size = args.batch_size

        model_name = args.task+'_model'
        model = getattr(locals()[model_name], 'build_%s' % args.model)(args.task, train_dset, params).cuda()
        # model.w_emb.init_embedding(dictionary,args.glove_file_path,args.task)

        print('========start train========')
        model = model.cuda()

        train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1)
        eval_loader = DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1)
        train(model, train_loader, eval_loader, params)
    else:
        test_dset = VQAFeatureDataset(args.task, dictionary, args.sentense_file_path,args.feat_category,args.feat_path, mode='Valid')
Ejemplo n.º 10
0
                                     osf_object,
                                     args.relation_type,
                                     adaptive=args.adaptive,
                                     pos_emb_dim=args.imp_pos_emb_dim,
                                     dataroot=args.data_folder)
        train_dset = GQAFeatureDataset('train_balanced',
                                       dictionary,
                                       osf_object,
                                       args.relation_type,
                                       adaptive=args.adaptive,
                                       pos_emb_dim=args.imp_pos_emb_dim,
                                       dataroot=args.data_folder)
    else:
        val_dset = VQAFeatureDataset('val',
                                     dictionary,
                                     args.relation_type,
                                     adaptive=args.adaptive,
                                     pos_emb_dim=args.imp_pos_emb_dim,
                                     dataroot=args.data_folder)
        train_dset = VQAFeatureDataset('train',
                                       dictionary,
                                       args.relation_type,
                                       adaptive=args.adaptive,
                                       pos_emb_dim=args.imp_pos_emb_dim,
                                       dataroot=args.data_folder)

    model = build_regat(val_dset, args).to(device)

    tfidf = None
    weights = None
    if args.dataset == "gqa" and args.tfidf:
        tfidf, weights = tfidf_from_questions_gqa(
Ejemplo n.º 11
0
if __name__ == '__main__':
    args = parse_args()

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    # torch.backends.cudnn.benchmark = True

    device = torch.device('cpu')
    if torch.cuda.is_available():
       device = torch.device('cuda:' + args.gpu)

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary, device)
    finetune_dset = VQAFeatureDataset('finetune', dictionary, device)
    dev_dset = VQAFeatureDataset('dev', dictionary, device)
    eval_dset = VQAFeatureDataset('test', dictionary, device)
    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid)
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    model = model.to(device)

    if args.model_ckpt:
        model.load_state_dict(torch.load(args.model_ckpt))
        print('Loaded checkpoint')
def main(args):
    softmax = torch.nn.Softmax(dim=-1)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    dictionary = Dictionary.load_from_file(args.dictionary_path)

    ques_net = Question_Classifier(args.bert_mode,
                                   args.bert_pretrain,
                                   num_classes=args.ques_num_classes)
    img_net = Network(backbone_type=args.backbone_type,
                      num_classes=args.img_num_classes)
    cls_net = models.resnet34(pretrained=False, num_classes=2)
    cls_net = cls_net.cuda()
    ques_net = ques_net.cuda()
    img_net = img_net.cuda()

    cls_net.load_state_dict(torch.load(args.cls2_model_path))
    ques_net.load_state_dict(
        torch.load(args.ques_model_path,
                   map_location=lambda storage, loc: storage))
    img_net.load_model(args.img_model_path)
    fd = json.load(open(args.feature_dict_path, 'r'))

    eval_dset = VQAFeatureDataset(args.split,
                                  dictionary,
                                  args.data_root,
                                  question_len=12,
                                  clip=True)
    eval_loader = DataLoader(eval_dset,
                             args.batch_size,
                             shuffle=False,
                             num_workers=2)

    label2ans = eval_dset.label2ans  # ans2label = {'no': 0, 'yes': 1, diseases...}

    cls_net.eval()
    ques_net.eval()
    img_net.eval()

    score = 0
    closed_ques_num = 28
    closed_score = 0
    cnt = 0

    out_dir = os.path.join(args.out_dir, args.split, args.model_name)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    pred_file = open(out_dir + '/prediction.txt', 'w')
    ques_pred_file = open(out_dir + '/question_type_prediction.txt', 'w')
    all_list = [0 for i in range(332)]
    true_list = [0 for i in range(332)]

    csc = 0
    csctotal = 0
    with torch.no_grad():
        for v, q, a, ans_type, q_types, image_name in tqdm(iter(eval_loader)):
            v, q, a = v.cuda(), q.cuda(), a.cuda()
            v = v.reshape(v.shape[0], 3, 224, 224)
            q_prob = ques_net(q)  # 1 x ques_num_classes
            q_prob = q_prob[
                0]  # [0: closed-ended-normal, 1: closed-ended-abnormal 2: open-ended]
            q_type = torch.argmax(q_prob)

            v_prob, feature = img_net(v)  # 1 x img_num_classes

            if q_type == 0:
                normal_prob2 = softmax(cls_net(v)[0])[0].item()
                abnormal_prob = 1 - normal_prob2
                pred = 0 if abnormal_prob > normal_prob2 else 1

            elif q_type == 1:
                normal_prob2 = softmax(cls_net(v)[0])[0].item()
                abnormal_prob = 1 - normal_prob2
                pred = 1 if abnormal_prob > normal_prob2 else 0
            else:
                disease_prob = softmax(v_prob)
                prob1, pred_idx = torch.topk(v_prob, 5, dim=-1)
                prob1 = softmax(prob1[0])

                if prob1[0] > 0.6:
                    csctotal += 1
                    pred = torch.argmax(disease_prob) + 2
                else:
                    pred_idx = pred_idx.cpu().numpy().tolist()[0]
                    p_idx = []
                    for i in pred_idx:
                        p_list = []
                        for fdict in fd[str(i + 2)]:
                            fdict = np.asarray(fdict)
                            fdict = torch.from_numpy(fdict)
                            cs = torch.cosine_similarity(feature.cpu(),
                                                         fdict,
                                                         dim=1)
                            p_list.append(cs.item())
                        p_idx.append(max(p_list))
                    pred = pred_idx[p_idx.index(max(p_idx))] + 2

            if args.split != 'test':
                gt = torch.argmax(a[0])
                all_list[gt.item()] += 1
                if pred == gt:
                    if pred > 1:
                        csc += 1
                    true_list[gt.item()] += 1
                    score += 1
                    if cnt < closed_ques_num:
                        closed_score += 1

            img_name = image_name[0]
            pred_ans = label2ans[pred]
            pred_file.write(img_name[:-4] + '|' + pred_ans + '\n')
            ques_pred_file.write(img_name[:-4] + '|' +
                                 str(q_type.cpu().numpy()) + '\n')
            cnt += 1

        if args.split != 'test':
            open_score = score - closed_score
            score = (score * 100.0) / cnt
            open_score = (open_score * 100.0) / (cnt - closed_ques_num)
            closed_score = (closed_score * 100.0) / closed_ques_num

            file = open(out_dir + '/score.txt', 'w')
            file.write('score: %.4f\n' % (score))
            file.write('closed score: %.4f\n' % (closed_score))
            file.write('open score: %.4f\n' % (open_score))
            print(csc / csctotal)
            print('score: %.4f' % (score))