Example #1
0
def main_inference():
    print("Loading config...")
    opt = BaseOptions().parse()
    print("Loading dataset...")
    dset = TVQADataset(opt, paths)
    print("Loading model...")
    model = TVQANet(opt)

    device = torch.device("cuda:0" if opt.device != '-2'
                          and torch.cuda.is_available() else "cpu")

    # if specified, use opt.device else use the better of whats available (gpu > cpu)
    #model.to(opt.device if opt.device != '-2' else device)

    cudnn.benchmark = True

    # load pre-trained model if it exists
    loadPreTrainedModel(model=model, modelPath=paths["pretrained_model"])

    model.eval()
    model.inference_mode = True
    torch.set_grad_enabled(False)
    print("Evaluation Starts:\n")
    predictions = inference(opt, dset, model)
    print("predictions {}".format(predictions.keys()))
    pred_path = paths["pretrained_model"].replace(
        "best_valid.pth", "{}_inference_predictions.json".format(opt.mode))
    save_json(predictions, pred_path)
def main():
    opt = BaseOptions().parse()
    torch.manual_seed(opt.seed)
    cudnn.benchmark = False
    cudnn.deterministic = True
    np.random.seed(opt.seed)

    dset = TVQADataset(opt)
    opt.vocab_size = len(dset.word2idx)

    model = TVQANet(opt)


    if opt.device.type == "cuda":
        print("CUDA enabled.")
        if len(opt.device_ids) > 1:
            print("Use multi GPU", opt.device_ids)
            model = torch.nn.DataParallel(model, device_ids=opt.device_ids, output_device=0)  # use multi GPU
        model.to(opt.device)


    # model.load_state_dict(torch.load("./path/best_release_7420.pth"))


    criterion = nn.CrossEntropyLoss(reduction="sum").to(opt.device)

    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=opt.lr,
        weight_decay=opt.wd)

    best_acc = 0.
    start_epoch = 0
    early_stopping_cnt = 0
    early_stopping_flag = False

    for epoch in range(start_epoch, opt.n_epoch):
        if not early_stopping_flag:

            niter = epoch * np.ceil(len(dset) / float(opt.bsz))

            cur_acc = train(opt, dset, model, criterion, optimizer, epoch, best_acc)

            is_best = cur_acc > best_acc
            best_acc = max(cur_acc, best_acc)
            if not is_best:
                early_stopping_cnt += 1
                if early_stopping_cnt >= opt.max_es_cnt:
                    early_stopping_flag = True
            else:
                early_stopping_cnt = 0
        else:
            print("=> early stop with valid acc %.4f" % best_acc)
            break  

        if epoch == 10:
            for g in optimizer.param_groups:
                g['lr'] = 0.0002

    return opt.results_dir.split("/")[1]
Example #3
0
def pretrain(cfg):
    set_seed(cfg.seed)
    writer = SummaryWriter(cfg.log_dir)
    cfg.writer = writer

    dset = TVQADataset(cfg)
    # dset.load_vid()
    cfg.vocab_size = len(dset.word2idx)
    model = ABC(cfg)
    if not cfg.no_glove:
        model.load_embedding(dset.vocab_embedding)

    model.cuda()
    cudnn.benchmark = True
    criterion = nn.CrossEntropyLoss(size_average=False).cuda()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=cfg.lr,
                                 weight_decay=cfg.wd)

    best_acc = 0.
    early_stopping_cnt = 0
    early_stopping_flag = False
    for epoch in range(cfg.n_epoch):
        if not early_stopping_flag:
            cur_acc = train(cfg, dset, model, criterion, optimizer, epoch,
                            best_acc)

            # remember best acc
            is_best = cur_acc > best_acc
            best_acc = max(cur_acc, best_acc)
            if not is_best:
                early_stopping_cnt += 1
                if early_stopping_cnt >= 3:
                    early_stopping_flag = True
        else:
            print("early stopping with valid acc %.4f" % best_acc)
            cfg.writer.export_scalars_to_json(
                os.path.join(cfg.log_dir, "all_scalars.json"))
            cfg.writer.close()
            break
Example #4
0
def main_inference():
    print("Loading config...")
    opt = TestOptions().parse()
    print("Loading dataset...")
    dset = TVQADataset(opt, mode=opt.mode)
    print("Loading model...")
    model = STAGE(opt)
    model.to(opt.device)
    cudnn.benchmark = True
    strict_mode = not opt.no_strict
    model_path = os.path.join("results", opt.model_dir, "best_valid.pth")
    model.load_state_dict(torch.load(model_path), strict=strict_mode)
    model.eval()
    model.inference_mode = True
    torch.set_grad_enabled(False)
    print("Evaluation Starts:\n")
    predictions = inference(opt, dset, model)
    print("predictions {}".format(predictions.keys()))
    pred_path = model_path.replace("best_valid.pth",
                                   "{}_inference_predictions.json".format(opt.mode))
    save_json(predictions, pred_path)
Example #5
0
File: main.py Project: sunutf/TVQA
        if opt.debug:
            break

    valid_acc = sum(valid_corrects) / float(len(valid_corrects))
    valid_loss = sum(valid_loss) / float(len(valid_corrects))
    return valid_acc, valid_loss


if __name__ == "__main__":
    torch.manual_seed(2018)
    opt = BaseOptions().parse()
    writer = SummaryWriter(opt.results_dir)
    opt.writer = writer

    dset = TVQADataset(opt)
    opt.vocab_size = len(dset.word2idx)
    model = ABC(opt)
    if not opt.no_glove:
        model.load_embedding(dset.vocab_embedding)

    model.to(opt.device)
    cudnn.benchmark = True
    criterion = nn.CrossEntropyLoss(size_average=False).to(opt.device)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=opt.lr,
                                 weight_decay=opt.wd)

    best_acc = 0.
    early_stopping_cnt = 0
Example #6
0
def main():
    opt = BaseOptions().parse()
    torch.manual_seed(opt.seed)
    cudnn.benchmark = False
    cudnn.deterministic = True
    np.random.seed(opt.seed)

    writer = SummaryWriter(opt.results_dir)
    opt.writer = writer
    dset = TVQADataset(opt)
    opt.vocab_size = len(dset.word2idx)
    model = STAGE(opt)

    count_parameters(model)

    if opt.device.type == "cuda":
        print("CUDA enabled.")
        model.to(opt.device)
        if len(opt.device_ids) > 1:
            print("Use multi GPU", opt.device_ids)
            model = torch.nn.DataParallel(
                model, device_ids=opt.device_ids)  # use multi GPU

    criterion = nn.CrossEntropyLoss(reduction="sum").to(opt.device)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=opt.lr,
                                 weight_decay=opt.wd)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.5,
                                                           patience=10,
                                                           verbose=True)

    best_acc = 0.
    start_epoch = 0
    early_stopping_cnt = 0
    early_stopping_flag = False
    for epoch in range(start_epoch, opt.n_epoch):
        if not early_stopping_flag:
            use_hard_negatives = epoch + 1 > opt.hard_negative_start  # whether to use hard negative sampling
            niter = epoch * np.ceil(len(dset) / float(opt.bsz))
            opt.writer.add_scalar("learning_rate",
                                  float(optimizer.param_groups[0]["lr"]),
                                  niter)
            cur_acc = train(opt,
                            dset,
                            model,
                            criterion,
                            optimizer,
                            epoch,
                            best_acc,
                            use_hard_negatives=use_hard_negatives)
            scheduler.step(cur_acc)  # decrease lr when acc is not improving
            # remember best acc
            is_best = cur_acc > best_acc
            best_acc = max(cur_acc, best_acc)
            if not is_best:
                early_stopping_cnt += 1
                if early_stopping_cnt >= opt.max_es_cnt:
                    early_stopping_flag = True
            else:
                early_stopping_cnt = 0
        else:
            print("=> early stop with valid acc %.4f" % best_acc)
            opt.writer.export_scalars_to_json(
                os.path.join(opt.results_dir, "all_scalars.json"))
            opt.writer.close()
            break  # early stop break

        if opt.debug:
            break

    return opt.results_dir.split("/")[1], opt.debug
Example #7
0
def visu(cfg):
    # os.environ['OMP_NUM_THREADS'] = '1'
    # os.environ['CUDA_VISIBLE_DEVICES'] = ''

    set_seed(cfg.seed)
    mn = cfg.memory_num
    demo = cfg.demo
    large = cfg.large
    if not os.path.exists(cfg.ckpt):
        print('Invalid ckpt path:', cfg.ckpt)
        exit(1)
    ckpt = torch.load(cfg.ckpt, map_location=lambda storage, loc: storage)
    print(cfg.ckpt, 'loaded')
    cfg.__dict__.update(ckpt['cfg'].__dict__)
    pprint(cfg.__dict__)
    cfg.memory_num = mn
    cfg.large = large
    cfg.demo = demo
    dset = TVQADataset(cfg)
    cfg.vocab_size = len(dset.word2idx)

    model = create_model(cfg)

    model.load_state_dict(ckpt['model'])
    model.cuda()

    dset.set_mode("valid")
    env = Environment(cfg, 'test', dset, shuffle=False)
    env.set_model(model)
    env.set_gpu_id(torch.cuda.current_device())

    f1_score = F1score()
    exact_match = ExactMatch()
    criterion = nn.CrossEntropyLoss()
    accs = []

    while True:
        data_idx = np.random.randint(len(dset))
        model.eval()
        env.reset(data_idx)

        print('-'*80)
        print('Data ID:', env.data_idx)
        print()

        input('\nPress enter to continue\n')

        train_data, solvable = env.observe()
        # For visualizing frame. Download frames data for this code and set path
        pic_path = '/st2/mshan/preprocessing/data/tv_qa/tv_qa_uncompressed/frames_hq'
        env.print_memory(pic_path)
        x = 'a'
        for i, entry in enumerate(train_data):
            # No batch. Compute one entry at once
            if entry.data is None:
                # Null entry...
                # But if there is none entry, can reach here if there is some None entry
                continue
            if entry.feature is None:
                # Do Mem forward and compute features (if feature is not computed yet)
                # 1 x 768(hidden_dim)

                # Video feature
                vid_feat = entry.data[0].cuda()
                # Sub feature
                if entry.data[1] is None:
                    sub_feat = torch.zeros(1, cfg.hidden_size, dtype=torch.float).cuda()
                else:
                    if cfg.model == "LRU_DNTM":
                        if i == cfg.memory_num-1:
                            sub_feat = model.q_embedding(torch.LongTensor(entry.data[1]).cuda())
                        else:
                            sub_feat = model.sub_embedding(torch.LongTensor(entry.data[1]).cuda())
                    else:
                        sub_feat = model.sub_embedding(torch.LongTensor(entry.data[1]).cuda())

                entry.feature = (vid_feat, sub_feat)

            if entry.hidden is None:
                if cfg.model == "LRU":
                    entry.hidden = torch.zeros(1, dtype=torch.float).cuda()
                else:
                    entry.hidden = torch.zeros(1, cfg.hidden_size * 2, dtype=torch.float).cuda()

        while not env.is_done():
            if x != 'c':
                x = input('\nPress c to skip through\n')

            entry = train_data[-1]
            if entry.data is None:
                # Null entry...
                # But if there is none entry, cannot reach here! (Because it will stuck on while condition)
                assert False
            if entry.feature is None:
                # Do Mem forward and compute features (if feature is not computed yet)
                # Feature is going to embedding
                # 1 x 768(hidden_dim)
                vid_feat = entry.data[0].cuda()
                # Sub feature
                if entry.data[1] is None:
                    sub_feat = torch.zeros(1, cfg.hidden_size, dtype=torch.float).cuda()
                else:
                    if cfg.model == "LRU_DNTM":
                        sub_feat = model.q_embedding(torch.LongTensor(entry.data[1]).cuda())
                        if train_data[-2].data[1] is not None:
                            train_data[-2].feature[1] = model.sub_embedding(torch.LongTensor(train_data[-2].data[1]).cuda())
                    else:
                        sub_feat = model.sub_embedding(torch.LongTensor(entry.data[1]).cuda())

                entry.feature = (vid_feat, sub_feat)

            if entry.hidden is None:
                if cfg.model == "LRU_DNTM":
                    entry.hidden = torch.zeros(1, dtype=torch.float).cuda()
                else:
                    entry.hidden = torch.zeros(1, cfg.hidden_size * 2, dtype=torch.float).cuda()

            # At here, all memory entries have feature (for spatial transformer) and hidden (for temporal GRU)
            # Stack

            modelargs = []
            input_mask = torch.ones([cfg.memory_num], dtype=torch.long).unsqueeze(0).cuda()
            vid_feature = torch.stack([entry.feature[0] for entry in train_data], 0).unsqueeze(0)
            sub_feature = torch.stack([entry.feature[1] for entry in train_data], 1)
            temporal_hidden = torch.stack([entry.hidden for entry in train_data], 1)

            modelargs.append(vid_feature)
            modelargs.append(sub_feature)
            modelargs.append(temporal_hidden)
            modelargs.append(input_mask)

            with torch.no_grad():
                if cfg.model == "T_LRU":
                    logit, value, temporal_hidden, att = model.mem_forward(*modelargs)
                else:
                    logit, value, temporal_hidden = model.mem_forward(*modelargs)
                    att = None

            # Reassigning temporal hidden
            if cfg.model == "LRU_DNTM":
                for i, entry in enumerate(train_data):
                    if i == cfg.memory_num - 1:
                        entry.hidden = torch.zeros(1, dtype=torch.float).cuda()
                    else:
                        entry.hidden = temporal_hidden[:, i]

            prob = F.softmax(logit, 1)
            log_prob = F.log_softmax(logit, 1)
            entropy = -(log_prob * prob).sum(1, keepdim=True)
            #action = prob.multinomial(num_samples=1)
            _, action = prob.max(1, keepdim=True)
            log_prob = log_prob.gather(1, action)

            if x != 'c':
                env.print_memory(pic_path, prob, att=att)

            env.step(action.item())
            env.step_append()

            train_data, solvable = env.observe()

        model_in_list, targets, _ = env.qa_construct(0)
        with torch.no_grad():
            outputs = model(*model_in_list)
        if outputs.max(0)[1].item() == targets.item():
            acc = 1
        else :
            acc = 0

        env.print_memory(pic_path, answer_set=(outputs.max(0)[1].item(), targets.item()))

        print("=== QA Result ===")
        print("Prediction: %s" % outputs.max(0)[1].item())
        print("Truth: %s" % targets.item())
        print("Accuracy: %.2f" % acc)

    print("Total mean accuracy : %.2f" % (sum(accs) / len(accs)))
    print("Test instance amount : %d" % (len(accs)))
Example #8
0
def test(cfg):
    # os.environ['OMP_NUM_THREADS'] = '1'
    # os.environ['CUDA_VISIBLE_DEVICES'] = ''

    set_seed(cfg.seed)
    mn = cfg.memory_num
    large = cfg.large
    if not os.path.exists(cfg.ckpt):
        print('Invalid ckpt path:', cfg.ckpt)
        exit(1)
    ckpt = torch.load(cfg.ckpt, map_location=lambda storage, loc: storage)
    print(cfg.ckpt, 'loaded')
    loaded_cfg = ckpt['cfg'].__dict__
    if loaded_cfg.get('num_workers') is not None:
        del loaded_cfg['num_workers']
    cfg.__dict__.update(loaded_cfg)
    cfg.model = cfg.model.upper()
    pprint(cfg.__dict__)
    cfg.memory_num = mn
    cfg.large = large

    dset = TVQADataset(cfg)
    cfg.vocab_size = len(dset.word2idx)

    model = create_model(cfg)

    model.load_state_dict(ckpt['model'])

    dset.set_mode("valid")
    print(len(dset))
    set_length = len(dset)
    env = Environment(cfg, 'test', dset, shuffle=False)
    env.set_model(model)
    env.set_gpu_id(torch.cuda.current_device())

    queue = mp.Queue()

    procs = []
    for i in range(cfg.num_workers):
        p = TestWorker(cfg, i, model, env, queue)
        p.start()
        procs.append(p)

    results = []
    for p in procs:
        while True:
            running = p.is_alive()
            if not queue.empty():
                result = queue.get()
                results.append(result)
            else:
                if not running:
                    break

    for p in procs:
        p.join()

    print('Processing duplicated factors')
    import pdb
    pdb.set_trace()
    acc = 0
    total_length = 0
    solvable = 0
    solvable_success = 0
    solvable_fail = 0

    solve_binary = 0
    solve_binary_success = 0
    solve_binary_fail = 0

    for i in range(len(results)):
        acc += results[i]['acc']
        total_length += results[i]['dataset_cnt']
        solvable += results[i]['solvable']
        solve_binary += results[i]['solve_binary']

        if results[i]['acc'] == 1:
            solvable_success += results[i]['solvable']
        else:
            solvable_fail += results[i]['solvable']

        if results[i]['acc'] == 1:
            solve_binary_success += results[i]['solve_binary']
        else:
            solve_binary_fail += results[i]['solve_binary']

    # answer_dict = {}
    # for i in range(len(results)):
    #     qa_id = results[i]['qa_id']
    #     pred = results[i]['pred']
    #     answer_dict[str(qa_id)] = pred
    #
    # import json
    # with open('prediction_valid.json', 'w') as fp:
    #     json.dump(answer_dict, fp, sort_keys=True)

    print("Dump output to json")

    from time import sleep
    sleep(3)
    print('All processes is finished.')
    print('Solvable: %.2f' % (solvable / total_length * 100))
    print('Solve binary (whether at least 1 sf or not) : %.2f' %
          (solve_binary / total_length * 100))
    print('Accuracy: %.2f' % (acc / total_length * 100))

    print('Rate of supporting frames in memory when Success: %.2f' %
          (solvable_success / acc * 100))
    print('Rate of supporting frames in memory when Fail: %.2f' %
          (solvable_fail / (total_length - acc) * 100))

    print('Rate of memory including at least one sf when Success: %.2f' %
          (solve_binary_success / acc * 100))
    print('Rate of memory including at least one sf when Fail: %.2f' %
          (solve_binary_fail / (total_length - acc) * 100))
Example #9
0
        cur_qid2targets = {qid:  target for qid, target in zip(qids, targets)}
        qid2targets = merge_two_dicts(qid2targets, cur_qid2targets)
    return qid2preds, qid2targets


def get_acc_from_qid_dicts(qid2preds, qid2targets):
    qids = qid2preds.keys()
    preds = np.asarray([int(qid2preds[ele]) for ele in qids])
    targets = np.asarray([int(qid2targets[ele]) for ele in qids])
    acc = sum(preds == targets) / float(len(preds))
    return acc


if __name__ == "__main__":
    opt = TestOptions().parse()
    dset = TVQADataset(opt)
    opt.vocab_size = len(dset.word2idx)
    model = ABC(opt)

    if opt.new_word2idx_path_test:
        print("The vocabulary needs to be extended.")
        dset.extend_vocab(model, opt.new_word2idx_path_test, opt.glove_path)

    model_path = os.path.join("results", opt.model_dir, "best_valid.pth")
    model.load_state_dict(torch.load(model_path))

    model.to(opt.device)
    cudnn.benchmark = True

    all_qid2preds, all_qid2targets = test(opt, dset, model)
Example #10
0
def main(cfg):
    ckpt = None
    if cfg.ckpt:
        if not os.path.exists(cfg.ckpt):
            print('Invalid ckpt path:', cfg.ckpt)
            exit(1)
        ckpt = torch.load(cfg.ckpt, map_location=lambda storage, loc: storage)

        print(cfg.ckpt, 'loaded')
        loaded_cfg = ckpt['cfg'].__dict__

        del loaded_cfg['num_episodes']
        del loaded_cfg['num_workers']
        del loaded_cfg['prepro_dir']

        cfg.__dict__.update(loaded_cfg)
        cfg.model = cfg.model.upper()

        print('Merged Config')
        pprint(cfg.__dict__)

    else:
        os.makedirs(os.path.join(cfg.log_dir, 'ckpt'), exist_ok=True)

    prepro_ckpt = None
    if cfg.pretrain_dir is not None:
        if not os.path.exists(cfg.pretrain_dir):
            print('Invalid pretraining ckpt path:', cfg.pretrain_dir)
            exit(1)
        prepro_ckpt = torch.load(os.path.join(cfg.pretrain_dir,
                                              "best_valid.pth"),
                                 map_location=lambda storage, loc: storage)

    dset = TVQADataset(cfg)
    cfg.vocab_size = len(dset.word2idx)
    dset_valid = TVQADataset(cfg)

    # Prepare model
    shared_model = create_model(cfg)

    if ckpt is not None:
        shared_model.load_state_dict(ckpt['model'])

    # Load TVQA ABC part
    if prepro_ckpt is not None:
        missing_keys = []
        unexpected_keys = []
        error_msgs = []
        metadata = getattr(prepro_ckpt, '_metadata', None)
        prepro_ckpt = prepro_ckpt.copy()
        if metadata is not None:
            prepro_ckpt._metadata = metadata

        def load(module, prefix=''):
            local_metadata = {} if metadata is None else metadata.get(
                prefix[:-1], {})
            module._load_from_state_dict(prepro_ckpt, prefix, local_metadata,
                                         True, missing_keys, unexpected_keys,
                                         error_msgs)
            for name, child in module._modules.items():
                if child is not None:
                    load(child, prefix + name + '.')

        load(shared_model, prefix='')
        print("Weights of {} not initialized from pretrained model: {}".format(
            shared_model.__class__.__name__, missing_keys))
        print("Weights from pretrained model not used in {}: {}".format(
            shared_model.__class__.__name__, unexpected_keys))
        # shared_model.load_state_dict(prepro_ckpt)

    shared_model.share_memory()

    optim = SharedAdam(filter(lambda p: p.requires_grad,
                              shared_model.parameters()),
                       lr=cfg.lr)

    if ckpt is not None:
        optim.load_state_dict(ckpt['optim'])

    optim.share_memory()

    set_seed(cfg.seed)
    dset.set_mode("train")
    train_env = Environment(cfg, 'train', dset, shuffle=True)
    dset_valid.set_mode("valid")
    valid_env = Environment(cfg, 'valid', dset_valid, shuffle=False)

    done = mp.Value('i', False)
    if ckpt is not None:
        gstep = mp.Value('i', ckpt['step'])
    else:
        gstep = mp.Value('i', 0)
    queue = mp.Queue()

    if cfg.debug:
        procs = []
        p = ValidWorker(cfg, len(procs), done, shared_model, optim, valid_env,
                        gstep)
        # p = TrainWorker(cfg, len(procs), done, shared_model, optim, train_env, queue, gstep)
        p.run()
    else:
        procs = []
        p = ValidWorker(cfg, len(procs), done, shared_model, optim, valid_env,
                        gstep)
        p.start()
        procs.append(p)

        for _ in range(cfg.num_workers - 1):
            p = TrainWorker(cfg, len(procs), done, shared_model, optim,
                            train_env, queue, gstep)
            p.start()
            procs.append(p)

        p = TensorboardWorker(cfg, len(procs), queue, done, gstep)
        p.start()
        procs.append(p)

        for p in procs:
            p.join()
        print('All processes is finished:', cfg.log_dir)