예제 #1
0
def build_dataloader(dataset, collate_fn, is_train, opts):
    batch_size = opts.train_batch_size if is_train else opts.val_batch_size
    if is_train:
        sampler = TokenBucketSampler(
            dataset.lens,
            bucket_size=BUCKET_SIZE,
            batch_size=batch_size,
            droplast=is_train,
        )
        dataloader = DataLoader(
            dataset,
            batch_sampler=sampler,
            num_workers=opts.n_workers,
            pin_memory=opts.pin_mem,
            collate_fn=collate_fn,
        )
    else:
        dataloader = DataLoader(
            dataset,
            batch_size=batch_size,
            num_workers=opts.n_workers,
            shuffle=False,
            pin_memory=opts.pin_mem,
            collate_fn=collate_fn,
        )
    dataloader = PrefetchLoader(dataloader)
    return dataloader
예제 #2
0
def create_dataloader(img_path, txt_path, batch_size, is_train, dset_cls,
                      collate_fn, opts):
    img_db_type = "gt" if "coco_gt" in img_path else "det"
    conf_th = -1 if img_db_type == "gt" else opts.conf_th
    num_bb = 100 if img_db_type == "gt" else opts.num_bb
    img_db = DetectFeatLmdb(img_path, conf_th, opts.max_bb, opts.min_bb,
                            num_bb, opts.compressed_db)
    txt_db = ReTxtTokLmdb(txt_path, opts.max_txt_len if is_train else -1)
    if is_train:
        dset = dset_cls(txt_db, img_db)
    else:
        dset = dset_cls(txt_db, img_db, use_gt_feat=img_db_type == "gt")
    batch_size = (opts.train_batch_size if is_train else opts.val_batch_size)
    sampler = DistributedSampler(dset,
                                 num_replicas=hvd.size(),
                                 rank=hvd.rank(),
                                 shuffle=False)
    dataloader = DataLoader(dset,
                            sampler=sampler,
                            batch_size=batch_size,
                            num_workers=opts.n_workers,
                            pin_memory=opts.pin_mem,
                            collate_fn=collate_fn)
    dataloader = PrefetchLoader(dataloader)
    return dataloader
예제 #3
0
def build_dataloader(dataset, batch_size, collate_fn, is_train, opts):
    loader = DataLoader(dataset, batch_size=batch_size,
                        num_workers=opts.n_workers,
                        pin_memory=opts.pin_mem,
                        collate_fn=collate_fn,
                        shuffle=is_train)
    return PrefetchLoader(loader)
예제 #4
0
def create_dataloader(opts, dataset_cls, collate_fn, mode='train'):
    assert mode in ['train', 'val', 'test']
    if mode == 'train':
        image_set = opts.train_image_set
        batch_size = opts.train_batch_size
    elif mode == 'val':
        image_set = opts.val_image_set
        batch_size = opts.val_batch_size
    else:
        image_set = opts.test_image_set
        batch_size = opts.val_batch_size

    dataset = dataset_cls(image_set,
                          opts.root_path,
                          opts.dataset_path,
                          use_img_type=opts.use_img_type,
                          test_mode=(mode == 'test'))
    sampler = DistributedTokenBucketSampler(hvd.size(),
                                            hvd.rank(),
                                            dataset.lens,
                                            bucket_size=BUCKET_SIZE,
                                            batch_size=batch_size,
                                            droplast=False,
                                            shuffle=(mode == 'train'))
    loader = DataLoader(dataset,
                        batch_sampler=sampler,
                        num_workers=opts.n_workers,
                        pin_memory=opts.pin_mem,
                        collate_fn=collate_fn)
    return PrefetchLoader(loader)
예제 #5
0
def build_dataloader(dataset, collate_fn, is_train, opts):
    batch_size = opts.train_batch_size if is_train else 1
    dataloader = DataLoader(dataset, batch_size=batch_size,
                            shuffle=is_train, drop_last=is_train,
                            num_workers=opts.n_workers,
                            pin_memory=opts.pin_mem, collate_fn=collate_fn)
    dataloader = PrefetchLoader(dataloader)
    return dataloader
예제 #6
0
def main(opts):
    hvd.init()
    device = torch.device("cuda")  # support single GPU only
    train_opts = Struct(json.load(open(f'{opts.train_dir}/log/hps.json')))

    if 'paired' in train_opts.model:
        EvalDatasetCls = Nlvr2PairedEvalDataset
        eval_collate_fn = nlvr2_paired_eval_collate
        if train_opts.model == 'paired':
            ModelCls = UniterForNlvr2Paired
        elif train_opts.model == 'paired-attn':
            ModelCls = UniterForNlvr2PairedAttn
        else:
            raise ValueError('unrecognized model type')
    elif train_opts.model == 'triplet':
        EvalDatasetCls = Nlvr2TripletEvalDataset
        ModelCls = UniterForNlvr2Triplet
        eval_collate_fn = nlvr2_triplet_eval_collate
    else:
        raise ValueError('unrecognized model type')

    img_db = DetectFeatLmdb(opts.img_db, train_opts.conf_th, train_opts.max_bb,
                            train_opts.min_bb, train_opts.num_bb,
                            opts.compressed_db)
    txt_db = TxtTokLmdb(opts.txt_db, -1)
    dset = EvalDatasetCls(txt_db, img_db, train_opts.use_img_type)
    batch_size = (train_opts.val_batch_size
                  if opts.batch_size is None else opts.batch_size)
    sampler = TokenBucketSampler(dset.lens,
                                 bucket_size=BUCKET_SIZE,
                                 batch_size=batch_size,
                                 droplast=False)
    eval_dataloader = DataLoader(dset,
                                 batch_sampler=sampler,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=eval_collate_fn)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    # Prepare model
    ckpt_file = f'{opts.train_dir}/ckpt/model_step_{opts.ckpt}.pt'
    checkpoint = torch.load(ckpt_file)
    model_config = UniterConfig.from_json_file(
        f'{opts.train_dir}/log/model.json')
    model = ModelCls(model_config, img_dim=IMG_DIM)
    model.init_type_embedding()
    model.load_state_dict(checkpoint, strict=False)
    model.to(device)
    model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    results = evaluate(model, eval_dataloader, device)
    # write results
    if not exists(opts.output_dir):
        os.makedirs(opts.output_dir)
    with open(f'{opts.output_dir}/results.csv', 'w') as f:
        for id_, ans in results:
            f.write(f'{id_},{ans}\n')
    print(f'all results written')
예제 #7
0
def create_dataloaders(datasets, is_train, opts, all_img_dbs=None):
    all_img_dbs = ImageLmdbGroup(opts.conf_th, opts.max_bb, opts.min_bb,
                                 opts.num_bb, opts.compressed_db)
    dataloaders = {}
    for dset in datasets:
        if is_train:
            txt_path = opts.train_txt_dbs
            img_path = opts.train_img_dbs
        else:
            txt_path = opts.val_txt_dbs
            img_path = opts.val_img_dbs

        for i, t in enumerate(dset['tasks']):
            task = f'{t}_{dset["name"]}'
            if is_train:
                LOGGER.info(f"Loading {task} train dataset "
                            f"{dset['db']}, {dset['img']}")
            else:
                LOGGER.info(f"Loading {task} validation dataset, "
                            f"{dset['db']}, {dset['img']}")

            if task.startswith('mlm'):
                dataset = build_mlm_dataset(txt_path, img_path, all_img_dbs,
                                            is_train, opts)
            elif task.startswith('mrfr'):
                dataset = build_mrfr_dataset(txt_path, img_path, all_img_dbs,
                                             is_train, opts)
            elif task.startswith('mrckl'):
                dataset = build_mrc_dataset(txt_path, img_path, all_img_dbs,
                                            is_train, opts)
            elif task.startswith('itm'):
                dataset = build_itm_dataset(txt_path, img_path, all_img_dbs,
                                            is_train, opts)
            elif task.startswith('itkm'):
                dataset = build_itkm_dataset(txt_path, img_path, all_img_dbs,
                                             is_train, opts)
            elif task.startswith('mkm'):
                dataset = build_mkm_dataset(txt_path, img_path, all_img_dbs,
                                            is_train, opts)
            else:
                raise ValueError(f'Undefined task {task}')

            LOGGER.info(f"{len(dataset[0])*hvd.size()} samples loaded")
            if task.startswith('itm'):
                # itm handles distributed training in dset not sampler
                loader = build_dataloader_itm(*dataset, is_train, opts)
            else:
                loader = build_dataloader(*dataset, is_train, opts)
            if is_train:
                ratio = dset['mix_ratio'][i]
                dataloaders[task] = (loader, ratio)
            else:
                dataloaders[task] = PrefetchLoader(loader)
    return dataloaders, all_img_dbs
예제 #8
0
def create_dataloaders(datasets, is_train, opts, all_img_dbs=None):
    # opts.conf_th : 0.2
    # opts.min_bb : 10
    # opts.num_bb 36
    if all_img_dbs is None:
        all_img_dbs = ImageLmdbGroup(opts.conf_th, opts.max_bb, opts.min_bb,
                                     opts.num_bb, opts.compressed_db)
    dataloaders = {}
    for dset in datasets:
        if is_train:
            assert len(dset['db']) == len(dset['img'])
            assert len(dset['tasks']) == len(dset['mix_ratio'])
            img_db = [all_img_dbs[path] for path in dset['img']]
        else:
            assert len(dset['db']) == len(dset['img']) == 1
            img_db = all_img_dbs[dset['img'][0]]

        for i, t in enumerate(dset['tasks']):
            task = f'{t}_{dset["name"]}'

            if is_train:
                LOGGER.info(f"Loading {task} train dataset "
                            f"{dset['db']}, {[img.img_dir for img in img_db]}")
                txt_db = [
                    TxtTokLmdb(path, opts.max_txt_len) for path in dset['db']
                ]
            else:
                LOGGER.info(f"Loading {task} validation dataset, "
                            f"{dset['db']}, {img_db.img_dir}")
                txt_db = TxtTokLmdb(dset['db'][0], -1)

            if task.startswith('mlm'):
                dataset = build_mlm_dataset(txt_db, img_db, is_train, opts)
            elif task.startswith('mrfr'):
                dataset = build_mrfr_dataset(txt_db, img_db, is_train, opts)
            elif task.startswith('mrc'):
                dataset = build_mrc_dataset(txt_db, img_db, is_train, opts)
            elif task.startswith('itm'):
                dataset = build_itm_dataset(txt_db, img_db, is_train, opts)
            else:
                raise ValueError(f'Undefined task {task}')

            LOGGER.info(f"{len(dataset[0])*hvd.size()} samples loaded")
            if task.startswith('itm'):
                # itm handles distributed training in dset not sampler
                loader = build_dataloader_itm(*dataset, is_train, opts)
            else:
                loader = build_dataloader(*dataset, is_train, opts)
            if is_train:
                ratio = dset['mix_ratio'][i]
                dataloaders[task] = (loader, ratio)
            else:
                dataloaders[task] = PrefetchLoader(loader)
    return dataloaders, all_img_dbs
예제 #9
0
def create_dataloader(img_path, txt_path, batch_size, is_train,
                      dset_cls, collate_fn, opts):
    img_db = DetectFeatLmdb(img_path, opts.conf_th, opts.max_bb, opts.min_bb,
                            opts.num_bb, opts.compressed_db)
    txt_db = TxtTokLmdb(txt_path, opts.max_txt_len if is_train else -1)
    dset = dset_cls(txt_db, img_db, opts.use_img_type)
    sampler = TokenBucketSampler(dset.lens, bucket_size=BUCKET_SIZE,
                                 batch_size=batch_size, droplast=is_train)
    loader = DataLoader(dset, batch_sampler=sampler,
                        num_workers=opts.n_workers, pin_memory=opts.pin_mem,
                        collate_fn=collate_fn)
    return PrefetchLoader(loader)
예제 #10
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    print('fasfafs: ', n_gpu)
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if opts.train_config is not None:
        train_opts = Struct(json.load(open(opts.train_config)))
        opts.conf_th = train_opts.conf_th
        opts.max_bb = train_opts.max_bb
        opts.min_bb = train_opts.min_bb
        opts.num_bb = train_opts.num_bb

    # load DBs and image dirs
    eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb,
                                 opts.min_bb, opts.num_bb, opts.compressed_db)
    eval_txt_db = TxtTokLmdb(opts.txt_db, -1)
    eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size)

    # Prepare model
    checkpoint = torch.load(opts.checkpoint)
    model = UniterForImageTextRetrieval.from_pretrained(opts.model_config,
                                                        checkpoint,
                                                        img_dim=IMG_DIM)
    if 'rank_output' not in checkpoint:
        model.init_output()  # zero shot setting

    model.to(device)
    model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=1,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=itm_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    eval_log, results = evaluate(model, eval_dataloader)
예제 #11
0
파일: ivcml_data.py 프로젝트: zhixinma/HERO
def build_dataloader(opts):
    # Load ground truth, query db and video db
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    video_ids = get_video_ids(opts.query_txt_db)
    video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                      model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db, (opts.split, opts.query_txt_db)
    q_txt_db = QueryTokLmdb(opts.query_txt_db, -1)

    eval_dataset = VcmrFullEvalDataset(video_ids,
                                       video_db,
                                       q_txt_db,
                                       distributed=model_opts.distributed_eval)
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=vcmr_full_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)
    return eval_dataloader
def build_dataloader(dataset, collate_fn, is_train, opts):
    batch_size = (opts.train_batch_size if is_train else opts.val_batch_size)
    if is_train:
        train_sampler = WeightedRandomSampler(dataset.weights_by_class,
                                              len(dataset),
                                              replacement=True)
        dataloader = DataLoader(dataset,
                                sampler=train_sampler,
                                num_workers=opts.n_workers,
                                batch_size=32,
                                pin_memory=opts.pin_mem,
                                collate_fn=collate_fn)
    else:
        sampler = TokenBucketSampler(dataset.lens,
                                     bucket_size=BUCKET_SIZE,
                                     batch_size=batch_size,
                                     droplast=is_train)
        dataloader = DataLoader(dataset,
                                batch_sampler=sampler,
                                num_workers=opts.n_workers,
                                pin_memory=opts.pin_mem,
                                collate_fn=collate_fn)
    dataloader = PrefetchLoader(dataloader)
    return dataloader
예제 #13
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    opts.rank = rank

    
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))
    device = torch.device("cuda:1")
    if opts.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, "
                         "should be >= 1".format(
                            opts.gradient_accumulation_steps))

    set_random_seed(opts.seed)

    if hvd.rank() == 0:
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        os.makedirs(join(opts.output_dir, 'ckpt'))
        save_training_meta(opts)
        # TB_LOGGER.create(join(opts.output_dir, 'log'))
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
        # store ITM predictions
        os.makedirs(join(opts.output_dir, 'results_val'))
        os.makedirs(join(opts.output_dir, 'results_test'))
        os.makedirs(join(opts.output_dir, 'results_train'))
    else:
        LOGGER.disabled = True
        model_saver = NoOp()

    # load DBs and image dirs
    all_img_dbs = ImageLmdbGroup(opts.conf_th, opts.max_bb, opts.min_bb,
                                 opts.num_bb, opts.compressed_db)
    # train
    train_dataset = MemeAIDataset(json_path = '/home/data/meme_json/train.json',
                                    npz_folder = '/home/data/faster_cnn_feature/', 
                                    mode = 'train')
    train_loader =  DataLoader(train_dataset, 
                                    batch_size = opts.train_batch_size, 
                                    shuffle = True, 
                                    num_workers = opts.n_workers,
                                    collate_fn=collate_fn)
    train_loader = PrefetchLoader(train_loader)

    # val
    val_dataset = MemeAIDataset(json_path = '/home/data/meme_json/dev.json',
                                    npz_folder = '/home/data/faster_cnn_feature/', 
                                    mode = 'val')
    val_loader =  DataLoader(val_dataset, 
                                    batch_size = opts.inf_minibatch_size, 
                                    shuffle = False, 
                                    num_workers = opts.n_workers,
                                    collate_fn=collate_fn)
    val_loader = PrefetchLoader(val_loader)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}

    model = Meme.from_pretrained(
        opts.model_config, state_dict=checkpoint,
        img_dim=IMG_DIM)
    model.init_output()  # pretrain ITM head is different from ranking head
    model.to(device)

    # make sure every process has same model parameters in the beginning
    # broadcast_tensors([p.data for p in model.parameters()], 0)
    # set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    model, optimizer = amp.initialize(model, optimizer,
                                      enabled=opts.fp16, opt_level='O2')

    global_step = 0
    # LOGGER.info(f"***** Running training on {n_gpu} GPUs *****")
    # LOGGER.info("  Num examples = %d", len(train_dataset) * hvd.size())
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    running_loss = RunningMeter('loss')
    model.train()

    n_examples = 0
    n_epoch = 0
    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    optimizer.step()
    # while True:
    for epoch in range(opts.epoch):
        print('epoch {}/ {}'.format(epoch, opts.epoch))
        pbar = tqdm(total=len(train_loader))

        model.train()
        preds = None
        gt = None

        for step, batch in enumerate(train_loader):
            x = batch[0]
            y = batch[1]
            n_examples += x['input_ids'].size(0)

            pred = model(x)

            if preds is None:

                preds = torch.sigmoid(pred)
                gt = y
            else:
                preds = torch.cat((preds, torch.sigmoid(pred)), dim = 0)
                gt = torch.cat((gt, y), dim = 0)


            loss = F.binary_cross_entropy(torch.sigmoid(pred), y)

            delay_unscale = (step+1) % opts.gradient_accumulation_steps != 0
            with amp.scale_loss(loss, optimizer, delay_unscale=delay_unscale
                                ) as scaled_loss:
                scaled_loss.backward()
                if not delay_unscale:
                    # gather gradients from every processes
                    # do this before unscaling to make sure every process uses
                    # the same gradient scale
                    grads = [p.grad.data for p in model.parameters()
                             if p.requires_grad and p.grad is not None]
                    all_reduce_and_rescale_tensors(grads, float(1))

            running_loss(loss.item())

            if (step + 1) % opts.gradient_accumulation_steps == 0:
                global_step += 1

                # learning rate scheduling
                lr_this_step = get_lr_sched(global_step, opts)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr_this_step
                TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

                # log loss
                # NOTE: not gathered across GPUs for efficiency
                TB_LOGGER.add_scalar('loss', running_loss.val, global_step)
                TB_LOGGER.step()

                # update model params
                if opts.grad_norm != -1:
                    grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                                opts.grad_norm)
                    TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
                optimizer.step()
                optimizer.zero_grad()


        global_step += 1

        # learning rate scheduling
        lr_this_step = get_lr_sched(global_step, opts)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr_this_step
        TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

        # log loss
        # NOTE: not gathered across GPUs for efficiency
        TB_LOGGER.add_scalar('loss', running_loss.val, global_step)
        TB_LOGGER.step()

        # update model params
        if opts.grad_norm != -1:
            grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                        opts.grad_norm)
            TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
        optimizer.step()
        optimizer.zero_grad()

        with torch.no_grad():
            preds = preds.cpu().numpy().reshape(len(preds), )
            gt = gt.cpu().numpy()
            roc = roc_auc_score(gt, preds)
            acc = accuracy_score(gt, np.around(preds)) 
        train_log = {'train/roc': roc, 'train/acc': acc}
        TB_LOGGER.log_scaler_dict({f"train/{k}": v for k, v in train_log.items()})

        # monitor training throughput

        val_log = validate(model, val_loader)
        TB_LOGGER.log_scaler_dict({f"valid/{k}": v for k, v in val_log.items()})

        LOGGER.info(train_log)
        LOGGER.info(val_log)

        model_saver.save(model, global_step)

        pbar.close()
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(json.load(open(hps_file)))
    model_config = f'{opts.output_dir}/log/model_config.json'

    # load DBs and image dirs
    video_ids = get_video_ids(opts.query_txt_db)
    video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                      model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db
    q_txt_db = QaQueryTokLmdb(opts.query_txt_db, -1)
    eval_dataset = ViolinEvalDataset(video_ids,
                                     video_db,
                                     q_txt_db,
                                     sampled_by_q=model_opts.sampled_by_q)
    collate_fn = violin_eval_collate

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])

    model = HeroForViolin.from_pretrained(model_config,
                                          state_dict=checkpoint,
                                          vfeat_dim=VFEAT_DIM,
                                          max_frm_seq_len=max_frm_seq_len)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=collate_fn)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results, logits = validate_violin(model, eval_dataloader, opts.split,
                                         opts.save_logits)
    result_dir = f'{opts.output_dir}/results_{opts.split}'
    if opts.save_logits:
        result_dir += '_w_logit'
    if not exists(result_dir) and hvd.rank() == 0:
        os.makedirs(result_dir)

    all_results = {}
    for id2res in all_gather_list(results):
        all_results.update(id2res)
    if opts.save_logits:
        all_logits = {}
        for id2logit in all_gather_list(logits):
            all_logits.update(id2logit)
    if hvd.rank() == 0:
        save_json(all_results,
                  f'{result_dir}/results_{opts.checkpoint}_all.json')
        LOGGER.info('All results written......')
        if opts.save_logits:
            save_pickle(all_logits,
                        f'{result_dir}/logits_{opts.checkpoint}_all.pkl')
            LOGGER.info('All logits written......')
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    opts.n_gpu = n_gpu
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    set_random_seed(opts.seed)

    # train_examples = None
    LOGGER.info(f"Loading the whole video dataset {opts.sub_txt_db}, "
                f"{opts.vfeat_db}")
    video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                      opts.vfeat_interval, opts)

    # data loaders
    # train
    LOGGER.info(f"Loading the train QA dataset {opts.train_query_txt_db}")
    video_ids = get_video_ids(opts.train_query_txt_db)
    train_q_txt_db = QaQueryTokLmdb(opts.train_query_txt_db, opts.max_txt_len)
    train_dataloaders = build_downstream_dataloaders([opts.task],
                                                     video_db,
                                                     video_ids,
                                                     True,
                                                     opts,
                                                     q_txt_db=train_q_txt_db,
                                                     shuffle=True)
    meta_loader = MetaLoader(train_dataloaders,
                             accum_steps=opts.gradient_accumulation_steps,
                             distributed=n_gpu > 1)
    meta_loader = PrefetchLoader(meta_loader)

    # val
    LOGGER.info(f"Loading the val QA dataset {opts.val_query_txt_db}")
    video_ids = get_video_ids(opts.val_query_txt_db)
    val_q_txt_db = QaQueryTokLmdb(opts.val_query_txt_db, -1)
    val_dataloaders = build_downstream_dataloaders([opts.task],
                                                   video_db,
                                                   video_ids,
                                                   False,
                                                   opts,
                                                   q_txt_db=val_q_txt_db)
    if opts.test_query_txt_db:
        LOGGER.info(f"Loading the test QA dataset {opts.test_query_txt_db}")
        video_ids = get_video_ids(opts.test_query_txt_db)
        test_q_txt_db = QaQueryTokLmdb(opts.test_query_txt_db, -1)
        test_dataloaders = build_downstream_dataloaders([opts.task],
                                                        video_db,
                                                        video_ids,
                                                        False,
                                                        opts,
                                                        q_txt_db=test_q_txt_db)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    if img_pos_embed_weight_key in checkpoint:
        max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    else:
        max_frm_seq_len = MAX_FRM_SEQ_LEN

    model = HeroForVideoQA.from_pretrained(opts.model_config,
                                           state_dict=checkpoint,
                                           vfeat_dim=VFEAT_DIM,
                                           max_frm_seq_len=max_frm_seq_len)

    model.to(device)
    # make sure every process has same model parameters in the beginning
    broadcast_tensors([p.data for p in model.parameters()], 0)
    set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())}
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      num_losses=len(task2scaler),
                                      enabled=opts.fp16,
                                      opt_level='O2')
    restorer = TrainingRestorer(opts, model, optimizer)
    global_step = restorer.global_step
    TB_LOGGER.global_step = global_step
    if hvd.rank() == 0:
        save_training_meta(opts)
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        pbar = tqdm(total=opts.num_train_steps)
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        if not exists(join(opts.output_dir, 'results')):
            # store tvqa predictions
            os.makedirs(join(opts.output_dir, 'results'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
    else:
        LOGGER.disabled = True
        pbar = NoOp()
        model_saver = NoOp()
        restorer = NoOp()

    if global_step > 0:
        pbar.update(global_step)
    LOGGER.info(f"***** Running training with {n_gpu} GPUs *****")
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    task2loss = {
        task: RunningMeter(f'loss/{task}')
        for task in train_dataloaders.keys()
    }

    for obj in (f'{opts.task}_qa', f'{opts.task}_st_ed'):
        task2loss[obj] = RunningMeter(f'loss/{obj}')

    model.train()
    n_examples = defaultdict(int)
    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    if global_step == 0:
        optimizer.step()
    for step, (task, batch) in enumerate(meta_loader):
        n_examples[task] += opts.train_batch_size

        loss = model(batch, task=task, compute_loss=True)

        loss_qa, loss_st_ed = loss
        loss = loss_qa + opts.lw_st_ed * loss_st_ed
        for n, ls in (('st_ed', loss_st_ed), ('qa', loss_qa)):
            ls = ls.item()
            task2loss[f'{task}_{n}'](ls)

        loss = loss.mean()
        task2loss[task](loss.item())

        delay_unscale = (step + 1) % opts.gradient_accumulation_steps != 0
        with amp.scale_loss(loss,
                            optimizer,
                            delay_unscale=delay_unscale,
                            loss_id=task2scaler[task]) as scaled_loss:
            scaled_loss.backward()
            if not delay_unscale:
                # gather gradients from every processes
                # do this before unscaling to make sure every process uses
                # the same gradient scale
                grads = [
                    p.grad.data for p in model.parameters()
                    if p.requires_grad and p.grad is not None
                ]
                all_reduce_and_rescale_tensors(grads, float(1))

        if (step + 1) % opts.gradient_accumulation_steps == 0:
            global_step += 1

            # learning rate scheduling
            lr_this_step = get_lr_sched(global_step, opts)
            for i, param_group in enumerate(optimizer.param_groups):
                if i == 0 or i == 1:
                    param_group['lr'] = lr_this_step * opts.lr_mul
                elif i == 2 or i == 3:
                    param_group['lr'] = lr_this_step
                else:
                    raise ValueError()
            TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

            TB_LOGGER.log_scaler_dict({
                temp_loss.name: temp_loss.val
                for temp_loss in task2loss.values()
                if temp_loss.val is not None
            })
            TB_LOGGER.step()

            # update model params
            if opts.grad_norm != -1:
                grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                            opts.grad_norm)
                TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
            optimizer.step()
            optimizer.zero_grad()
            restorer.step()
            pbar.update(1)

            if global_step % 100 == 0:
                # monitor training throughput
                LOGGER.info('-------------------------------------------')
                LOGGER.info(f'Step {global_step}:')
                for t in train_dataloaders.keys():
                    tot_ex = sum(all_gather_list(n_examples[t]))
                    ex_per_sec = int(tot_ex / (time() - start))
                    LOGGER.info(f'{t}: {tot_ex} examples trained at '
                                f'{ex_per_sec} ex/s')
                    TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec,
                                         global_step)

            if global_step % opts.valid_steps == 0:
                LOGGER.info('===========================================')
                LOGGER.info(f"Step {global_step}: start running validation")
                validate(model,
                         val_dataloaders,
                         "val",
                         opts,
                         global_step=global_step)
                if opts.test_query_txt_db:
                    validate(model,
                             test_dataloaders,
                             "test",
                             opts,
                             global_step=global_step)
                LOGGER.info('===========================================')
                model_saver.save(model, global_step)
        if global_step >= opts.num_train_steps:
            break

    LOGGER.info('===========================================')
    if global_step % opts.valid_steps != 0:
        LOGGER.info('===========================================')
        LOGGER.info(f"Step {global_step}: start running validation")
        validate(model, val_dataloaders, "val", opts, global_step=global_step)
        if opts.test_query_txt_db:
            validate(model,
                     test_dataloaders,
                     "test",
                     opts,
                     global_step=global_step)
        LOGGER.info('===========================================')
    model_saver.save(model, f'{global_step}_final')
예제 #16
0
파일: pretrain.py 프로젝트: zmykevin/UC2
def create_dataloaders(datasets, is_train, opts, all_img_dbs=None):
    if all_img_dbs is None:
        all_img_dbs = ImageLmdbGroup(opts.conf_th, opts.max_bb, opts.min_bb,
                                     opts.num_bb, opts.compressed_db)
    dataloaders = {}
    for dset in datasets:
        if is_train:
            assert len(dset['db']) == len(dset['img'])
            assert len(dset['tasks']) == len(dset['mix_ratio'])
            img_db = [all_img_dbs[path] for path in dset['img']]
        else:
            assert len(dset['db']) == len(dset['img']) == 1
            img_db = all_img_dbs[dset['img'][0]]

        for i, t in enumerate(dset['tasks']):
            task = f'{t}_{dset["name"]}'

            if is_train:
                LOGGER.info(f"Loading {task} train dataset "
                            f"{dset['db']}, {[img.img_dir for img in img_db]}")
                txt_db = [TxtTokLmdb(path, opts.max_txt_len)
                          for path in dset['db']]
                language_list = []
                #only get the language_list from 'cc'
                if dset['name'] == 'cc' and opts.multilingual_vmlm and task.startswith('vmlm'):
                    for path in dset['db']:
                        language = path.split('_')[-2] #Hacky Way to get the language, Need a better mechanism
                        language_list.append(language)

            else:
                LOGGER.info(f"Loading {task} validation dataset, "
                            f"{dset['db']}, {img_db.img_dir}")
                txt_db = TxtTokLmdb(dset['db'][0], -1)
                language_list = []
                if opts.multilingual_vmlm and task.startswith('vmlm'):
                    lan = dset["name"].split('_')[-1]
                    language_list.append(lan)
            
            if task.startswith('mlm'):
                blind = 'blind' in task
                dataset = build_mlm_dataset(txt_db, img_db,
                                            blind, is_train, opts)
            elif task.startswith('tlm'):
                blind = 'blind' in task
                text_only = "ni" in task
                dataset = build_tlm_dataset(txt_db, img_db,
                                            blind, is_train, opts, text_only)
            elif task.startswith('mmxlm'):
                if 'soft' in task:
                    soft = True
                else:
                    soft = False
                            
                dataset = build_mmxlm_dataset(txt_db, img_db, is_train, opts, soft)
            elif task.startswith('vmlm'):
                if 'soft' in task:
                    soft = True
                    #load the img_soft_label
                    assert dset.get('img_token_soft_label', None) is not None
                else:
                    soft = False
                if is_train:
                    if soft:
                        assert len(dset['db']) == len(dset['img_token_soft_label'])
                        img_token_sl_db = [Img_SoftLabel_Lmdb(path) for path in dset['img_token_soft_label']]
                    else:
                        img_token_sl_db = None
                else:
                    if soft:
                        assert len(dset['db']) == len(dset['img_token_soft_label']) == 1
                        img_token_sl_db = Img_SoftLabel_Lmdb(dset['img_token_soft_label'][0])
                    else:
                        img_token_sl_db = None
                        
                #print(language_list)
                dataset = build_vmlm_dataset(txt_db, img_db, img_token_sl_db, is_train, opts, soft, language_list=language_list)
            elif task.startswith('mrfr'):
                only_i = 'only_i' in task
                dataset = build_mrfr_dataset(txt_db, img_db,
                                             only_i, is_train, opts)
            elif task.startswith('mrm-nce'):
                only_i = 'only_i' in task
                dataset = build_mrm_nce_dataset(txt_db, img_db,
                                                only_i, is_train, opts)
            elif task.startswith('mrc'):
                only_i = 'only_i' in task
                dataset = build_mrc_dataset(txt_db, img_db,
                                            only_i, is_train, opts)
            elif task.startswith('itm'):
                dataset = build_itm_dataset(txt_db, img_db, is_train, opts)
            else:
                raise ValueError(f'Undefined task {task}')

            LOGGER.info(f"{len(dataset[0])*hvd.size()} samples loaded")
            if task.startswith('itm'):
                # itm handles distributed training in dset not sampler
                loader = build_dataloader_itm(*dataset, is_train, opts)
            else:
                loader = build_dataloader(*dataset, is_train, opts)
            if is_train:
                ratio = dset['mix_ratio'][i]
                dataloaders[task] = (loader, ratio)
            else:
                dataloaders[task] = PrefetchLoader(loader)
    return dataloaders, all_img_dbs
예제 #17
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))

    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = json.load(open(hps_file))
    if 'mlp' not in model_opts:
        model_opts['mlp'] = 1
    model_opts = Struct(model_opts)
    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_epoch_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    model = UniterForReferringExpressionComprehension.from_pretrained(
        f'{opts.output_dir}/log/model.json', checkpoint,
        img_dim=IMG_DIM, mlp=model_opts.mlp)
    model.to(device)
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level='O2')

    # load DBs and image dirs
    img_db_type = "gt" if "coco_gt" in opts.img_db else "det"
    conf_th = -1 if img_db_type == "gt" else model_opts.conf_th
    num_bb = 100 if img_db_type == "gt" else model_opts.num_bb
    eval_img_db = DetectFeatLmdb(opts.img_db,
                                 conf_th, model_opts.max_bb,
                                 model_opts.min_bb, num_bb,
                                 opts.compressed_db)

    # Prepro txt_dbs
    txt_dbs = opts.txt_db.split(':')
    for txt_db in txt_dbs:
        print(f'Evaluating {txt_db}')
        eval_txt_db = ReTxtTokLmdb(txt_db, -1)
        eval_dataset = ReEvalDataset(
            eval_txt_db, eval_img_db, use_gt_feat=img_db_type == "gt")

        sampler = DistributedSampler(eval_dataset, num_replicas=n_gpu,
                                     rank=rank, shuffle=False)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=sampler,
                                     batch_size=opts.batch_size,
                                     num_workers=opts.n_workers,
                                     pin_memory=opts.pin_mem,
                                     collate_fn=re_eval_collate)
        eval_dataloader = PrefetchLoader(eval_dataloader)

        # evaluate
        val_log, results = evaluate(model, eval_dataloader)

        result_dir = f'{opts.output_dir}/results_test'
        if not exists(result_dir) and rank == 0:
            os.makedirs(result_dir)
        write_to_tmp(
            f"{txt_db.split('_')[1].split('.')[0]}-acc({img_db_type}): {results['acc']*100:.2f}% ",
            args.tmp_file)

        all_results = list(concat(all_gather_list(results)))

        if hvd.rank() == 0:
            db_split = txt_db.split('/')[-1].split('.')[0]  # refcoco+_val
            img_dir = opts.img_db.split('/')[-1]  # re_coco_gt
            with open(f'{result_dir}/'
                    f'results_{opts.checkpoint}_{db_split}_on_{img_dir}_all.json', 'w') as f:
                json.dump(all_results, f)
        # print
        print(f'{opts.output_dir}/results_test')

    write_to_tmp(f'\n', args.tmp_file)
예제 #18
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    opts.n_gpu = n_gpu
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True

    set_random_seed(opts.seed)

    # data loaders
    train_dataloaders = {}
    val_dataloaders = {}
    for target, t_r in zip(opts.targets, opts.targets_ratio):
        train_loaders, val_loaders = build_target_loaders(
            target, t_r,
            opts)  # -> choose which task and get corrsponding task dataloder
        train_dataloaders.update(train_loaders)
        val_dataloaders.update(val_loaders)
    meta_loader = MetaLoader(train_dataloaders,
                             accum_steps=opts.gradient_accumulation_steps,
                             distributed=n_gpu > 1)
    meta_loader = PrefetchLoader(meta_loader)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    if img_pos_embed_weight_key in checkpoint:
        max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    else:
        max_frm_seq_len = MAX_FRM_SEQ_LEN

    if opts.load_partial_pretrained:
        # from roberta
        model = HeroForPretraining(VideoModelConfig(opts.model_config),
                                   vfeat_dim=VFEAT_DIM,
                                   max_frm_seq_len=max_frm_seq_len,
                                   lw_neg_ctx=opts.lw_neg_ctx,
                                   lw_neg_q=opts.lw_neg_q,
                                   lw_st_ed=0,
                                   ranking_loss_type=opts.ranking_loss_type,
                                   use_hard_negative=False,
                                   hard_pool_size=opts.hard_pool_size,
                                   margin=opts.margin,
                                   use_all_neg=opts.use_all_neg,
                                   drop_svmr_prob=opts.drop_svmr_prob)
        model.load_partial_pretrained(checkpoint,
                                      VFEAT_DIM,
                                      max_frm_seq_len,
                                      skip_layers=opts.skip_layer_loading)
    else:
        # continue training
        model = HeroForPretraining.from_pretrained(
            opts.model_config,
            state_dict=checkpoint,
            vfeat_dim=VFEAT_DIM,
            max_frm_seq_len=max_frm_seq_len,
            lw_neg_ctx=opts.lw_neg_ctx,
            lw_neg_q=opts.lw_neg_q,
            lw_st_ed=0,
            ranking_loss_type=opts.ranking_loss_type,
            use_hard_negative=False,
            hard_pool_size=opts.hard_pool_size,
            margin=opts.margin,
            use_all_neg=opts.use_all_neg,
            drop_svmr_prob=opts.drop_svmr_prob)

    model.to(device)
    # make sure every process has same model parameters in the beginning
    broadcast_tensors([p.data for p in model.parameters()], 0)
    set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())}
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      num_losses=len(task2scaler),
                                      enabled=opts.fp16,
                                      opt_level='O2')
    restorer = TrainingRestorer(opts, model, optimizer)
    all_gather_list(None)  # sync to prevent slower rank to read training meta
    global_step = restorer.global_step
    TB_LOGGER.global_step = global_step
    if hvd.rank() == 0:
        save_training_meta(opts)
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        pbar = tqdm(total=opts.num_train_steps)
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
    else:
        pbar = NoOp()
        model_saver = NoOp()
        restorer = NoOp()

    if global_step > 0:
        pbar.update(global_step)
    LOGGER.info(f"***** Running training with {n_gpu} GPUs *****")
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    task2loss = {
        task: RunningMeter(f'loss/{task}')
        for task in train_dataloaders.keys()
    }
    for task in train_dataloaders.keys():
        if task.startswith('vsm'):
            for obj in ('st_ed', 'neg_ctx', 'neg_q'):
                task2loss[f"{task}_{obj}"] = RunningMeter(f'loss/{task}_{obj}')
    model.train()
    n_examples = defaultdict(int)
    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    if global_step == 0:
        optimizer.step()
    assert all(global_step == s for s in all_gather_list(global_step))
    for step, (task, batch) in enumerate(meta_loader):
        LOGGER.debug(f"Task: {task}")

        # hard negative in VSM
        if len(opts.hard_negtiave_start_step) > 0:
            for i, hn_step in enumerate(opts.hard_negtiave_start_step):
                if global_step >= hn_step and hn_step != -1:
                    model.set_hard_negative(True, opts.hard_pool_size[i],
                                            opts.hard_neg_weights[i])

        # start-end loss
        if opts.train_span_start_step != -1 and\
                global_step >= opts.train_span_start_step:
            model.set_train_st_ed(opts.lw_st_ed)

        train_task = task.split('_')[0]
        n_examples[task] += opts.train_batch_size

        loss = model(batch, task=train_task, compute_loss=True)
        if train_task == 'vsm':
            loss_st_ed, loss_neg_ctx, loss_neg_q = loss
            loss = loss_st_ed + loss_neg_ctx + loss_neg_q
            for n, ls, w in (('st_ed', loss_st_ed, opts.lw_st_ed),
                             ('neg_ctx', loss_neg_ctx, opts.lw_neg_ctx),
                             ('neg_q', loss_neg_q, opts.lw_neg_q)):
                ls = ls.item()
                if w:
                    ls /= w
                task2loss[f'{task}_{n}'](ls)
        elif train_task == "mffr":
            loss = torch.sqrt(loss.sum(dim=1))

        loss = loss.mean()
        task2loss[task](loss.item())

        delay_unscale = (step + 1) % opts.gradient_accumulation_steps != 0
        with amp.scale_loss(loss,
                            optimizer,
                            delay_unscale=delay_unscale,
                            loss_id=task2scaler[task]) as scaled_loss:
            scaled_loss.backward()
            if not delay_unscale:
                # gather gradients from every processes
                # do this before unscaling to make sure every process uses
                # the same gradient scale
                grads = [
                    p.grad.data for p in model.parameters()
                    if p.requires_grad and p.grad is not None
                ]
                LOGGER.debug("before reduce grad")
                all_reduce_and_rescale_tensors(grads, float(1))
                LOGGER.debug("after reduce grad")

        if (step + 1) % opts.gradient_accumulation_steps == 0:
            global_step += 1

            # learning rate scheduling
            lr_this_step = get_lr_sched(global_step, opts)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

            # log loss
            # NOTE: only consider rank 0 for speed
            TB_LOGGER.log_scaler_dict({
                ll.name: ll.val
                for ll in task2loss.values() if ll.val is not None
            })
            TB_LOGGER.step()

            LOGGER.debug("before norm grad")
            # update model params
            if opts.grad_norm != -1:
                grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                            opts.grad_norm)
                TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
            LOGGER.debug("after norm grad")
            LOGGER.debug("before optim step")
            optimizer.step()
            optimizer.zero_grad()
            pbar.update(1)
            LOGGER.debug("after optim step")

            if global_step % 100 == 0:
                LOGGER.debug("after gather stats")
                # monitor training throughput
                LOGGER.info('-------------------------------------------')
                LOGGER.info(f'Step {global_step}:')
                for t in train_dataloaders.keys():
                    tot_ex = sum(all_gather_list(n_examples[t]))
                    ex_per_sec = int(tot_ex / (time() - start))
                    LOGGER.info(f'{t}: {tot_ex} examples trained at '
                                f'{ex_per_sec} ex/s')
                    TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec,
                                         global_step)
                LOGGER.debug("after gather stats")

            if global_step % opts.valid_steps == 0:
                LOGGER.info('===========================================')
                LOGGER.info(f"Step {global_step}: start running validation")
                validate(model, val_dataloaders, opts)
                LOGGER.info('===========================================')
                model_saver.save(model, global_step)

            # step restorer in the end to prevent missing validation checkpoint
            restorer.step()
        if global_step >= opts.num_train_steps:
            break

    LOGGER.info('===========================================')
    if global_step % opts.valid_steps != 0:
        LOGGER.info('===========================================')
        LOGGER.info(f"Step {global_step}: start running validation")
        validate(model, val_dataloaders, opts)
        LOGGER.info('===========================================')
        model_saver.save(model, global_step)
예제 #19
0
파일: pretrain.py 프로젝트: zmykevin/UC2
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    opts.rank = rank
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))
    
    if opts.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, "
                         "should be >= 1".format(
                            opts.gradient_accumulation_steps))

    set_random_seed(opts.seed)

    if rank == 0:
        save_training_meta(opts)
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        pbar = tqdm(total=opts.num_train_steps)
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
    else:
        LOGGER.disabled = True
        pbar = NoOp()
        model_saver = NoOp()

    all_dbs = [db for datasets in [opts.train_datasets, opts.val_datasets]
               for dset in datasets for db in dset['db']]

    tokenizer = json.load(open(f'{all_dbs[0]}/meta.json'))['bert']
    #print(tokenizer)
    # assert all(tokenizer == json.load(open(f'{db}/meta.json'))['bert']
    #            for db in all_dbs)

    # build data loaders
    train_dataloaders, all_img_dbs = create_dataloaders(
        opts.train_datasets, True, opts)
    val_dataloaders, _ = create_dataloaders(
        opts.val_datasets, False, opts, all_img_dbs)
    meta_loader = MetaLoader(train_dataloaders,
                             accum_steps=opts.gradient_accumulation_steps,
                             distributed=n_gpu > 1)
    meta_loader = PrefetchLoader(meta_loader)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}
    if opts.rename_checkpoints:
        rename_checkpoint(checkpoint)
    #Include early_adaptation
    if opts.early_adaptation:
        early_adaptation_checkpoint = torch.load(opts.early_adaptation_checkpoint)
        checkpoint['roberta.img_embeddings.img_linear.weight'] = early_adaptation_checkpoint['v2w_linear.weight']
        checkpoint['roberta.img_embeddings.img_linear.bias'] = early_adaptation_checkpoint['v2w_linear.bias']
    
    model = VLXLMRForPretraining.from_pretrained(
        opts.model_config, checkpoint,
        img_dim=IMG_DIM, img_label_dim=IMG_LABEL_DIM,
        nce_temp=opts.nce_temp, ot_pos_only=opts.ot_pos_only)

    # model = UniterForPretraining.from_pretrained(
    #     opts.model_config, checkpoint,
    #     img_dim=IMG_DIM, img_label_dim=IMG_LABEL_DIM,
    #     nce_temp=opts.nce_temp, ot_pos_only=opts.ot_pos_only)

    model.pad_vocab()  # tensor core padding for vocabulary
    model.to(device)
    model.train()
    # make sure every process has same model parameters in the beginning
    broadcast_tensors([p.data for p in model.parameters()], 0)
    set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())}
    model, optimizer = amp.initialize(model, optimizer,
                                      num_losses=len(task2scaler),
                                      enabled=opts.fp16, opt_level='O2')

    #global_step = 0
    #Initialize the TrainingRestorer
    restorer = TrainingRestorer(opts, model, optimizer)
    global_step = restorer.global_step
    TB_LOGGER._global_step = global_step
    if hvd.rank() !=0:
        restorer = NoOp() #Added for Restoring the Checkpoints

    if global_step > 0:
        pbar.update(global_step)

    LOGGER.info(f"***** Running training with {n_gpu} GPUs *****")
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)
    
    # to compute training statistics
    task2loss = {task: RunningMeter(f'loss/{task}')
                 for task in train_dataloaders.keys()}
    # ITM w/ OT
    if opts.itm_ot_lambda > 0:
        for task in train_dataloaders.keys():
            if task.startswith('itm'):
                task2loss[f'{task}_xe'] = RunningMeter(f'loss/{task}_xe')
                task2loss[f'{task}_ot'] = RunningMeter(f'loss/{task}_ot')
                if not opts.ot_pos_only:
                    task2loss[f'{task}_ot_pos'] = RunningMeter(
                        f'loss/{task}_ot_pos')
                    task2loss[f'{task}_ot_neg'] = RunningMeter(
                        f'loss/{task}_ot_neg')
    
    n_examples = defaultdict(int)
    n_in_units = defaultdict(int)
    n_loss_units = defaultdict(int)
    n_neg_nce = defaultdict(int)
    grad_norm = 0

    start = time()
    #Added by Mingyang to debug the training procedure
    # debug_start = torch.cuda.Event(enable_timing=True)
    # debug_end = torch.cuda.Event(enable_timing=True)

    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    optimizer.step()
    #Added by Mingyang Zhou
    # debug_start.record()
    for step, (name, batch) in enumerate(meta_loader):

        # forward pass
        assert all(name == n for n in all_gather_list(name))
        n_examples[name] += batch['input_ids'].size(0)
        n_in_units[name] += (batch['attn_masks'] == 1).sum().item()
        if 'nce' in name:
            n_neg_nce[name] += batch['neg_feats'].size(0)
        task = name.split('_')[0]
        loss = model(batch, task=task, compute_loss=True)
        if task.startswith('itm'):
            # OT
            itm_loss, ot_loss = loss
            n_loss_units[name] += itm_loss.size(0)
            itm_loss = itm_loss.mean()
            if ot_loss is not None:
                if not opts.ot_pos_only:
                    ot_pos, ot_neg = ot_loss
                    ot_loss = (ot_pos.sum() - ot_neg.sum()
                               ) / (ot_pos.size(0) + ot_neg.size(0))

                    # NOTE: be ware of empty tensor
                    ot_pos = ot_pos.mean().item()
                    if not math.isnan(ot_pos):
                        task2loss[f'{name}_ot_pos'](ot_pos)
                    ot_neg = ot_neg.mean().item()
                    if not math.isnan(ot_neg):
                        task2loss[f'{name}_ot_neg'](ot_neg)
                else:
                    ot_loss = ot_loss.mean()
                loss = itm_loss + opts.itm_ot_lambda * ot_loss
                task2loss[f'{name}_xe'](itm_loss.item())
                task2loss[f'{name}_ot'](ot_loss.item())
            else:
                loss = itm_loss
        elif task.startswith('vmlm-soft'):
            loss = 1000*loss.mean()
        else:
            n_loss_units[name] += loss.size(0)
            loss = loss.mean()  # loss is not normalized in model

        # backward pass
        delay_unscale = (step+1) % opts.gradient_accumulation_steps != 0
        with amp.scale_loss(loss, optimizer, delay_unscale=delay_unscale,
                            loss_id=task2scaler[name]) as scaled_loss:
            scaled_loss.backward()
            if not delay_unscale:
                # gather gradients from every processes
                # do this before unscaling to make sure every process uses
                # the same gradient scale
                grads = [p.grad.data for p in model.parameters()
                         if p.requires_grad and p.grad is not None]
                all_reduce_and_rescale_tensors(grads, float(1))
        task2loss[name](loss.item())

        # optimizer update and logging
        if (step + 1) % opts.gradient_accumulation_steps == 0:
            global_step += 1

            # learning rate scheduling
            lr_this_step = get_lr_sched(global_step, opts)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

            # log loss
            # for t, l in task2loss.items():
            #     loss = sum(v for v in all_gather_list(l.val)
            #                if v is not None) / hvd.size()
            #     task2loss[t] = RunningMeter(f'loss/{t}', loss)
            
            TB_LOGGER.log_scaler_dict({l.name: l.val
                                       for l in task2loss.values()
                                       if l.val is not None})
            TB_LOGGER.step()

            # update model params
            if opts.grad_norm != -1:
                '''
                if global_step % 10 == 0 and not opts.fp16:
                    bias = model.bert.img_embeddings.img_linear.bias
                    weight = model.bert.img_embeddings.img_linear.weight
                    print(f"bnorm: {bias.norm()}")
                    print(f"wnorm: {weight.norm()}")
                    print(f"bgnorm: {bias.grad.norm()}")
                    print(f"wgnorm: {weight.grad.norm()}")

                    mask = model.bert.img_embeddings.mask_embedding.weight
                    print(f"mnorm: {mask.norm()}")
                    print(f"mgnorm: {mask.grad.norm()}")

                    print([(n, p.grad.norm().item())
                           for n, p in model.named_parameters()
                           if p.grad is not None
                              and p.grad.norm().item() > grad_norm/10])
                '''
                grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                            opts.grad_norm)
                TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
            optimizer.step()
            optimizer.zero_grad()
            pbar.update(1)

            if global_step % 100 == 0:
                # monitor training throughput
                LOGGER.info(f'==============Step {global_step}===============')
                for t in train_dataloaders.keys():
                    assert all(tt == t for tt in all_gather_list(t))
                    tot_ex = sum(all_gather_list(n_examples[t]))
                    ex_per_sec = int(tot_ex / (time()-start))
                    tot_in = sum(all_gather_list(n_in_units[t]))
                    in_per_sec = int(tot_in / (time()-start))
                    tot_l = sum(all_gather_list(n_loss_units[t]))
                    l_per_sec = int(tot_l / (time()-start))
                    LOGGER.info(f'{t}: {tot_ex} examples trained at '
                                f'{ex_per_sec} ex/s')
                    TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec,
                                         global_step)
                    TB_LOGGER.add_scalar(f'perf/{t}_in_per_s', in_per_sec,
                                         global_step)
                    TB_LOGGER.add_scalar(f'perf/{t}_loss_per_s', l_per_sec,
                                         global_step)
                    if 'nce' in t:
                        avg_neg = sum(all_gather_list(n_neg_nce[t])
                                      ) / hvd.size() // step
                        LOGGER.info(f'{t}: averaging '
                                    f'{avg_neg} negative samples')
                LOGGER.info(f'===============================================')

            if global_step % opts.valid_steps == 0:
                LOGGER.info(f'Step {global_step}: start validation')
                validate(model, val_dataloaders)
                #os.makedir('/'.join([opts.output_dir, "ckpt")
                model_saver.save(model, global_step, optimizer)
            restorer.step()
        if global_step >= opts.num_train_steps:
            break

    if global_step % opts.valid_steps != 0:
        LOGGER.info(f'Step {global_step}: start validation')
        validate(model, val_dataloaders)
        model_saver.save(model, global_step)
예제 #20
0
def build_downstream_dataloaders(tasks,
                                 video_db,
                                 video_ids,
                                 is_train,
                                 opts,
                                 q_txt_db=None,
                                 shuffle=False):
    dataloaders = {}
    assert q_txt_db is not None
    for i, task in enumerate(tasks):
        if is_train:
            LOGGER.info(f"Loading {task} train dataset "
                        f"{video_db.img_db.img_dir}")
            batch_size = opts.train_batch_size
        else:
            batch_size = opts.val_batch_size
            LOGGER.info(f"Loading {task} validation dataset"
                        f"{video_db.img_db.img_dir}")
        if task in ["tvqa", "how2qa"]:
            if is_train:
                dataset = VideoQaDataset(video_ids, video_db, q_txt_db)
                collate_fn = video_qa_collate
            else:
                dataset = VideoQaEvalDataset(video_ids, video_db, q_txt_db)
                collate_fn = video_qa_eval_collate
        elif task in ["tvr", "how2r", "didemo_video_sub"]:
            if is_train:
                dataset = VcmrDataset(video_ids, video_db, q_txt_db)
                collate_fn = vcmr_collate
            else:
                dataset = VcmrEvalDataset(video_ids, video_db, q_txt_db)
                collate_fn = vcmr_eval_collate
        elif task == "didemo_video_only":
            if is_train:
                dataset = VcmrVideoOnlyDataset(video_ids, video_db, q_txt_db)
                collate_fn = vcmr_collate
            else:
                dataset = VcmrVideoOnlyEvalDataset(video_ids, video_db,
                                                   q_txt_db)
                collate_fn = vcmr_eval_collate
        elif task == "msrvtt_video_only":
            if is_train:
                dataset = VrVideoOnlyDataset(video_ids, video_db, q_txt_db)
                collate_fn = vr_collate
            else:
                dataset = VrVideoOnlyEvalDataset(video_ids, video_db, q_txt_db)
                collate_fn = vr_eval_collate
        elif task == "msrvtt_video_sub":
            if is_train:
                dataset = VrDataset(video_ids, video_db, q_txt_db)
                collate_fn = vr_collate
            else:
                dataset = VrEvalDataset(video_ids, video_db, q_txt_db)
                collate_fn = vr_eval_collate
        elif task == "violin":
            if is_train:
                dataset = ViolinDataset(video_ids, video_db, q_txt_db)
                collate_fn = violin_collate
            else:
                dataset = ViolinEvalDataset(video_ids, video_db, q_txt_db)
                collate_fn = violin_eval_collate
        else:
            raise ValueError(f'Undefined task {task}')
        LOGGER.info(f"{sum(all_gather_list(len(dataset)))} samples loaded")
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=opts.n_workers,
                            pin_memory=opts.pin_mem,
                            collate_fn=collate_fn,
                            shuffle=shuffle)
        if is_train:
            ratio = 1
            dataloaders[task] = (loader, ratio)
        else:
            dataloaders[task] = PrefetchLoader(loader)
    return dataloaders
예제 #21
0
파일: eval_vcmr.py 프로젝트: zhixinma/HERO
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, 16-bits training: {}".format(device, n_gpu, hvd.rank(), opts.fp16))
    if hvd.rank() != 0:
        LOGGER.disabled = True
    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(load_json(hps_file))
    model_config = f'{opts.output_dir}/log/model_config.json'

    # load DBs and image dirs
    video_ids = get_video_ids(opts.query_txt_db)
    if opts.task != "didemo_video_only":
        video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db, model_opts.vfeat_interval, model_opts)
    else:
        txt_meta = load_json(os.path.join(opts.query_txt_db, "meta.json"))
        video_db = load_video_only_dataset(opts.vfeat_db, txt_meta, model_opts.vfeat_interval, model_opts)
    assert opts.split in opts.query_txt_db
    q_txt_db = QueryTokLmdb(opts.query_txt_db, -1)
    if opts.task != "didemo_video_only":
        inf_dataset = VcmrFullEvalDataset
    else:
        inf_dataset = VcmrVideoOnlyFullEvalDataset

    eval_dataset = inf_dataset(video_ids, video_db, q_txt_db, distributed=model_opts.distributed_eval)

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    img_pos_embed_weight_key = ("v_encoder.f_encoder.img_embeddings.position_embeddings.weight")
    assert img_pos_embed_weight_key in checkpoint
    max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])

    model = HeroForVcmr.from_pretrained(
        model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=model_opts.lw_neg_ctx,
        lw_neg_q=model_opts.lw_neg_q, lw_st_ed=0,
        ranking_loss_type=model_opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=model_opts.hard_pool_size,
        margin=model_opts.margin,
        use_all_neg=model_opts.use_all_neg,
        drop_svmr_prob=model_opts.drop_svmr_prob)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset, batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=vcmr_full_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results = validate_full_vcmr(model, eval_dataloader, opts.split, opts, model_opts)
    result_dir = f'{opts.output_dir}/results_{opts.split}'

    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results_list = all_gather_list(results)

    if hvd.rank() == 0:  # save for only one time
        all_results = {"video2idx": all_results_list[0]["video2idx"]}
        for rank_id in range(hvd.size()):
            for key, val in all_results_list[rank_id].items():
                if key == "video2idx":
                    continue
                if key not in all_results:
                    all_results[key] = []
                all_results[key].extend(all_results_list[rank_id][key])
        LOGGER.info('All results joined......')

        # save_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vr.json')
        # save_vcmr_base_on_vr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr_base_on_vr.json')
        save_vcmr(all_results, f'{result_dir}/results_{opts.checkpoint}_{opts.split}_vcmr.json')
예제 #22
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    opts.n_gpu = n_gpu
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if hvd.rank() != 0:
        LOGGER.disabled = True
    set_random_seed(opts.seed)

    # train_examples = None
    LOGGER.info(f"Loading the whole video dataset {opts.sub_txt_db}, "
                f"{opts.vfeat_db}")
    if opts.task != "didemo_video_only":
        video_db = load_video_sub_dataset(opts.vfeat_db, opts.sub_txt_db,
                                          opts.vfeat_interval, opts)
    else:
        txt_meta = load_json(join(opts.train_query_txt_db, "meta.json"))
        video_db = load_video_only_dataset(opts.vfeat_db, txt_meta,
                                           opts.vfeat_interval, opts)

    # data loaders
    # train
    video_ids = get_video_ids(opts.train_query_txt_db)
    train_q_txt_db = QueryTokLmdb(opts.train_query_txt_db, opts.max_txt_len)
    train_dataloaders = build_downstream_dataloaders([opts.task],
                                                     video_db,
                                                     video_ids,
                                                     True,
                                                     opts,
                                                     shuffle=True,
                                                     q_txt_db=train_q_txt_db)
    meta_loader = MetaLoader(train_dataloaders,
                             accum_steps=opts.gradient_accumulation_steps,
                             distributed=n_gpu > 1)
    meta_loader = PrefetchLoader(meta_loader)

    # val
    video_ids = get_video_ids(opts.val_query_txt_db)
    val_q_txt_db = QueryTokLmdb(opts.val_query_txt_db, -1)
    val_dataloaders = build_downstream_dataloaders([opts.task],
                                                   video_db,
                                                   video_ids,
                                                   False,
                                                   opts,
                                                   q_txt_db=val_q_txt_db)

    if opts.task != "didemo_video_only":
        inf_dataset = VcmrFullEvalDataset
    else:
        inf_dataset = VcmrVideoOnlyFullEvalDataset
    LOGGER.info(f"Loading Inference Dataset {opts.val_query_txt_db} (val)")
    val_dset = inf_dataset(video_ids,
                           video_db,
                           val_q_txt_db,
                           distributed=opts.distributed_eval)
    inf_loader_val = DataLoader(val_dset,
                                batch_size=opts.vcmr_eval_q_batch_size,
                                num_workers=opts.n_workers,
                                pin_memory=opts.pin_mem,
                                collate_fn=vcmr_full_eval_collate)
    inf_loader_val = PrefetchLoader(inf_loader_val)
    if opts.test_query_txt_db:
        LOGGER.info(
            f"Loading Inference Dataset {opts.test_query_txt_db} (test)")
        video_ids = get_video_ids(opts.test_query_txt_db)
        test_q_txt_db = QueryTokLmdb(opts.test_query_txt_db, -1)
        test_dset = inf_dataset(video_ids,
                                video_db,
                                test_q_txt_db,
                                distributed=opts.distributed_eval)
        inf_loader_test = DataLoader(test_dset,
                                     batch_size=opts.vcmr_eval_q_batch_size,
                                     num_workers=opts.n_workers,
                                     pin_memory=opts.pin_mem,
                                     collate_fn=vcmr_full_eval_collate)
        inf_loader_test = PrefetchLoader(inf_loader_test)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}
    img_pos_embed_weight_key = "v_encoder.f_encoder.img_embeddings" +\
        ".position_embeddings.weight"
    if img_pos_embed_weight_key in checkpoint:
        max_frm_seq_len = len(checkpoint[img_pos_embed_weight_key])
    else:
        max_frm_seq_len = MAX_FRM_SEQ_LEN

    model = HeroForVcmr.from_pretrained(
        opts.model_config,
        state_dict=checkpoint,
        vfeat_dim=VFEAT_DIM,
        max_frm_seq_len=max_frm_seq_len,
        lw_neg_ctx=opts.lw_neg_ctx,
        lw_neg_q=opts.lw_neg_q,
        lw_st_ed=0,
        ranking_loss_type=opts.ranking_loss_type,
        use_hard_negative=False,
        hard_pool_size=opts.hard_pool_size,
        margin=opts.margin,
        use_all_neg=opts.use_all_neg,
        drop_svmr_prob=opts.drop_svmr_prob)

    model.to(device)
    # make sure every process has same model parameters in the beginning
    broadcast_tensors([p.data for p in model.parameters()], 0)
    set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())}
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      num_losses=len(task2scaler),
                                      enabled=opts.fp16,
                                      opt_level='O2')
    restorer = TrainingRestorer(opts, model, optimizer)
    global_step = restorer.global_step
    TB_LOGGER.global_step = global_step
    if hvd.rank() == 0:
        save_training_meta(opts)
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        pbar = tqdm(total=opts.num_train_steps)
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        if not exists(join(opts.output_dir, 'results')):
            # store tvr predictions
            os.makedirs(join(opts.output_dir, 'results'))
        if opts.nms_thd != -1:
            # store tvr-nms predictions
            if not exists(join(opts.output_dir, 'results_nms')):
                os.makedirs(join(opts.output_dir, 'results_nms'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
    else:
        pbar = NoOp()
        model_saver = NoOp()
        restorer = NoOp()

    if global_step > 0:
        pbar.update(global_step)
    LOGGER.info(f"***** Running training with {n_gpu} GPUs *****")
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    task2loss = {
        task: RunningMeter(f'loss/{task}')
        for task in train_dataloaders.keys()
    }

    for obj in (f'{opts.task}_st_ed', f'{opts.task}_neg_ctx',
                f'{opts.task}_neg_q'):
        task2loss[obj] = RunningMeter(f'loss/{obj}')
    model.train()
    n_examples = defaultdict(int)
    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    if global_step == 0:
        optimizer.step()
    for step, (task, batch) in enumerate(meta_loader):
        if len(opts.hard_negtiave_start_step) > 0:
            for i, hn_step in enumerate(opts.hard_negtiave_start_step):
                if global_step >= hn_step and hn_step != -1:
                    model.set_hard_negative(True, opts.hard_pool_size[i],
                                            opts.hard_neg_weights[i])
        if opts.train_span_start_step != -1 and\
                global_step >= opts.train_span_start_step:
            model.set_train_st_ed(opts.lw_st_ed)

        n_examples[task] += opts.train_batch_size

        loss = model(batch, task=task, compute_loss=True)

        loss_st_ed, loss_neg_ctx, loss_neg_q = loss
        loss = loss_st_ed + loss_neg_ctx + loss_neg_q
        for n, ls, w in (('st_ed', loss_st_ed, opts.lw_st_ed),
                         ('neg_ctx', loss_neg_ctx, opts.lw_neg_ctx),
                         ('neg_q', loss_neg_q, opts.lw_neg_q)):
            ls = ls.item()
            if w:
                ls /= w
            task2loss[f'{task}_{n}'](ls)

        loss = loss.mean()
        task2loss[task](loss.item())

        delay_unscale = (step + 1) % opts.gradient_accumulation_steps != 0
        with amp.scale_loss(loss,
                            optimizer,
                            delay_unscale=delay_unscale,
                            loss_id=task2scaler[task]) as scaled_loss:
            scaled_loss.backward()
            if not delay_unscale:
                # gather gradients from every processes
                # do this before unscaling to make sure every process uses
                # the same gradient scale
                grads = [
                    p.grad.data for p in model.parameters()
                    if p.requires_grad and p.grad is not None
                ]
                all_reduce_and_rescale_tensors(grads, float(1))

        if (step + 1) % opts.gradient_accumulation_steps == 0:
            global_step += 1

            # learning rate scheduling
            lr_this_step = get_lr_sched(global_step, opts)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

            # log loss
            TB_LOGGER.log_scaler_dict({
                temp_loss.name: temp_loss.val
                for temp_loss in task2loss.values()
                if temp_loss.val is not None
            })
            TB_LOGGER.step()

            # update model params
            if opts.grad_norm != -1:
                grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                            opts.grad_norm)
                TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
            optimizer.step()
            optimizer.zero_grad()
            pbar.update(1)

            if global_step % 100 == 0:
                # monitor training throughput
                LOGGER.info('-------------------------------------------')
                LOGGER.info(f'Step {global_step}:')
                for t in train_dataloaders.keys():
                    tot_ex = sum(all_gather_list(n_examples[t]))
                    ex_per_sec = int(tot_ex / (time() - start))
                    LOGGER.info(f'{t}: {tot_ex} examples trained at '
                                f'{ex_per_sec} ex/s')
                    TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec,
                                         global_step)

            if global_step % opts.valid_steps == 0:
                LOGGER.info('===========================================')
                LOGGER.info(f"Step {global_step}: start running validation")
                validate(model, val_dataloaders, opts)
                if hvd.rank() == 0 or opts.distributed_eval:
                    log, results = validate_full_vcmr(model,
                                                      inf_loader_val,
                                                      'val',
                                                      opts,
                                                      model_opts=opts)
                    save_json(
                        results, f'{opts.output_dir}/results/'
                        f'val_results_{global_step}_rank{hvd.rank()}.json')
                    TB_LOGGER.log_scaler_dict(log)
                    if opts.test_query_txt_db:
                        log, results = validate_full_vcmr(model,
                                                          inf_loader_test,
                                                          'test',
                                                          opts,
                                                          model_opts=opts)
                        save_json(
                            results, f'{opts.output_dir}/results/'
                            f'test_results_{global_step}_rank{hvd.rank()}.json'
                        )
                        TB_LOGGER.log_scaler_dict(log)
                LOGGER.info('===========================================')
                model_saver.save(model, global_step)

            # step restorer in the end to prevent missing validation checkpoint
            restorer.step()
        if global_step >= opts.num_train_steps:
            break

    LOGGER.info('===========================================')
    if global_step % opts.valid_steps != 0:
        if hvd.rank() == 0 or opts.distributed_eval:
            log, results = validate_full_vcmr(model,
                                              inf_loader_val,
                                              'val',
                                              opts,
                                              model_opts=opts)
            save_json(
                results, f'{opts.output_dir}/results/'
                f'val_results_{global_step}'
                f'_rank{hvd.rank()}_final.json')
            TB_LOGGER.log_scaler_dict(log)
            if opts.test_query_txt_db:
                log, results = validate_full_vcmr(model,
                                                  inf_loader_test,
                                                  'test',
                                                  opts,
                                                  model_opts=opts)
                save_json(
                    results, f'{opts.output_dir}/results/'
                    f'test_results_{global_step}_rank{hvd.rank()}.json')
                TB_LOGGER.log_scaler_dict(log)
    model_saver.save(model, f'{global_step}_final')
예제 #23
0
def create_dataloaders(datasets, is_train, opts, all_img_dbs=None):
    if all_img_dbs is None:
        all_img_dbs = ImageLmdbGroup(opts.conf_th, opts.max_bb, opts.min_bb,
                                     opts.num_bb, opts.compressed_db)
    dataloaders = {}

    for dset in datasets:
        for vcr_task in ["qa", "qar"]:
            if is_train:
                assert len(dset['db']) == len(dset['img'])
                assert len(dset['tasks']) == len(dset['mix_ratio'])
                img_db, img_db_gt = [], []
                for img_path in dset['img']:
                    curr_img_db, curr_img_db_gt = load_img_feat(
                        img_path, all_img_dbs, opts)
                    img_db.append(curr_img_db)
                    img_db_gt.append(curr_img_db_gt)
            else:
                assert len(dset['db']) == len(dset['img']) == 1
                img_db, img_db_gt = load_img_feat(dset['img'][0], all_img_dbs,
                                                  opts)

            for i, t in enumerate(dset['tasks']):
                task = f'{t}_{dset["name"]}'

                if is_train:
                    LOGGER.info(
                        f"Loading {task} train dataset with vcr_{vcr_task}, "
                        f"{dset['db']}, {[img.img_dir for img in img_db]},"
                        f"{[img.img_dir for img in img_db_gt]}")
                    txt_db = [
                        VcrTxtTokLmdb(path, opts.max_txt_len, task=vcr_task)
                        for path in dset['db']
                    ]
                else:
                    LOGGER.info(
                        f"Loading {task} val dataset with vcr_{vcr_task}, "
                        f"{dset['db']}, {img_db.img_dir},"
                        f"{img_db_gt.img_dir}")
                    txt_db = VcrTxtTokLmdb(dset['db'][0], -1, task=vcr_task)

                if task.startswith('mlm'):
                    dataset = build_mlm_dataset(txt_db, img_db_gt, img_db,
                                                is_train, opts)
                elif task.startswith('mrfr'):
                    dataset = build_mrfr_dataset(txt_db, img_db_gt, img_db,
                                                 is_train, opts)
                elif task.startswith('mrc'):
                    dataset = build_mrc_dataset(txt_db, img_db_gt, img_db,
                                                is_train, opts)
                else:
                    raise ValueError(f'Undefined task {task}')

                LOGGER.info(f"{len(dataset[0])*hvd.size()} samples loaded")
                loader = build_dataloader(*dataset, is_train, opts)
                if is_train:
                    ratio = dset['mix_ratio'][i]
                    dataloaders[task] = (loader, ratio)
                else:
                    dataloaders[task] = PrefetchLoader(loader)
    return dataloaders, all_img_dbs
예제 #24
0
def build_target_loaders(target, tgt_ratio, opts):
    if 'vfeat_shards' in target:
        sub_txt_db = SubTokLmdb(f"{opts.txt_db}/{target['sub_txt_db']}",
                                opts.max_clip_len)
        video_db = [
            load_video_sub_dataset(f"{target['vfeat_db']}/{shard}", sub_txt_db,
                                   target['vfeat_interval'], opts)
            for shard in target['vfeat_shards']
        ]
    else:
        # video_db -> data/data.py No.392
        video_db = load_video_sub_dataset(
            f"{opts.img_db}/{target['vfeat_db']}",  # /video/tv/
            f"{opts.txt_db}/{target['sub_txt_db']}",  # /txt/tv_subtitles.db
            target['vfeat_interval'],
            opts)  # 1.5, opts
    train_loaders = {}
    val_loaders = {}
    for split in target['splits']:
        if 'ratio' not in split:
            split['ratio'] = [1] * len(split['tasks'])
        assert len(split['tasks']) == len(split['ratio'])
        for task, r in zip(split['tasks'], split['ratio']):
            name = f"{task}_{target['name']}_{split['name']}"
            LOGGER.info(f'loading {name} ...')
            ratio = tgt_ratio * r
            if isinstance(video_db, list):
                all_train_ids = [
                    json.load(open(f"{opts.txt_db}/{ids}"))
                    for ids in split['train_idx']
                ]
            else:
                train_ids = json.load(
                    open(f"{opts.txt_db}/{split['train_idx']}"))
            val_ids = json.load(open(f"{opts.txt_db}/{split['val_idx']}"))
            if task == 'mlm':
                if isinstance(video_db, list):
                    train_dset = ConcatDataset([
                        VideoMlmDataset(ids,
                                        vid_db,
                                        opts.mask_prob,
                                        sub_ctx_len=opts.sub_ctx_len)
                        for ids, vid_db in zip(all_train_ids, video_db)
                    ])
                    val_dset = VideoMlmDataset(val_ids,
                                               video_db[0],
                                               opts.mask_prob,
                                               sub_ctx_len=opts.sub_ctx_len)
                else:
                    train_dset = VideoMlmDataset(train_ids,
                                                 video_db,
                                                 opts.mask_prob,
                                                 sub_ctx_len=opts.sub_ctx_len)
                    val_dset = VideoMlmDataset(val_ids,
                                               video_db,
                                               opts.mask_prob,
                                               sub_ctx_len=opts.sub_ctx_len)
                train_collate = mlm_collate
                val_collate = mlm_collate
            elif task == 'mfm-nce' or task == 'mffr':
                if isinstance(video_db, list):
                    train_dset = ConcatDataset([
                        MfmDataset(ids, vid_db, opts.mask_prob)
                        for ids, vid_db in zip(all_train_ids, video_db)
                    ])
                    val_dset = MfmDataset(val_ids, video_db[0], opts.mask_prob)
                else:
                    train_dset = MfmDataset(train_ids, video_db,
                                            opts.mask_prob)
                    val_dset = MfmDataset(val_ids, video_db, opts.mask_prob)
                train_collate = mfm_collate
                val_collate = mfm_collate
            elif task == 'fom':
                if isinstance(video_db, list):
                    train_dset = ConcatDataset([
                        FomDataset(ids, vid_db, opts.mask_prob)
                        for ids, vid_db in zip(all_train_ids, video_db)
                    ])
                    val_dset = FomEvalDataset(val_ids, video_db[0],
                                              opts.mask_prob)
                else:
                    train_dset = FomDataset(train_ids, video_db,
                                            opts.mask_prob)
                    val_dset = FomEvalDataset(val_ids, video_db,
                                              opts.mask_prob)
                train_collate = fom_collate
                val_collate = fom_eval_collate
            elif task == 'vsm':
                if isinstance(video_db, list):
                    train_dset = ConcatDataset([
                        VsmDataset(ids, vid_db, sub_ctx_len=opts.sub_ctx_len)
                        for ids, vid_db in zip(all_train_ids, video_db)
                    ])
                    val_dset = VsmDataset(val_ids,
                                          video_db[0],
                                          sub_ctx_len=opts.sub_ctx_len)
                else:
                    train_dset = VsmDataset(train_ids,
                                            video_db,
                                            sub_ctx_len=opts.sub_ctx_len)
                    val_dset = VsmDataset(val_ids,
                                          video_db,
                                          sub_ctx_len=opts.sub_ctx_len)
                train_collate = vsm_collate
                val_collate = vsm_collate
            else:
                raise ValueError(f'undefined task {task}')
            train_loader = DataLoader(train_dset,
                                      batch_size=opts.train_batch_size,
                                      num_workers=opts.n_workers,
                                      pin_memory=opts.pin_mem,
                                      collate_fn=train_collate,
                                      shuffle=True)
            val_loader = DataLoader(val_dset,
                                    batch_size=opts.val_batch_size,
                                    num_workers=opts.n_workers,
                                    pin_memory=opts.pin_mem,
                                    collate_fn=val_collate,
                                    shuffle=False)
            train_loaders[name] = (train_loader, ratio)
            val_loaders[name] = PrefetchLoader(val_loader)
    return train_loaders, val_loaders
예제 #25
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    opts.rank = rank
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))

    if opts.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, "
                         "should be >= 1".format(
                            opts.gradient_accumulation_steps))

    set_random_seed(opts.seed)

    if rank == 0:
        save_training_meta(opts)
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        pbar = tqdm(total=opts.num_train_steps)
        model_saver = ModelSaver(join(args.output_dir, 'ckpt'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
    else:
        LOGGER.disabled = True
        pbar = NoOp()
        model_saver = NoOp()

    all_dbs = [db for datasets in [opts.train_datasets, opts.val_datasets]
               for dset in datasets for db in dset['db']]

    tokenizer = json.load(open(f'{all_dbs[0]}/meta.json'))['bert']
    assert all(tokenizer == json.load(open(f'{db}/meta.json'))['bert']
               for db in all_dbs)

    # build data loaders
    train_dataloaders, all_img_dbs = create_dataloaders(
        opts.train_datasets, True, opts)
    val_dataloaders, _ = create_dataloaders(
        opts.val_datasets, False, opts, all_img_dbs)
    meta_loader = MetaLoader(train_dataloaders,
                             accum_steps=opts.gradient_accumulation_steps,
                             distributed=n_gpu > 1)
    meta_loader = PrefetchLoader(meta_loader)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}
    model = UniterForPretraining.from_pretrained(
        opts.model_config, checkpoint,
        img_dim=IMG_DIM, img_label_dim=IMG_LABEL_DIM)
    model.to(device)
    model.train()
    # make sure every process has same model parameters in the beginning
    broadcast_tensors([p.data for p in model.parameters()], 0)
    set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    task2scaler = {t: i for i, t in enumerate(train_dataloaders.keys())}
    model, optimizer = amp.initialize(model, optimizer,
                                      num_losses=len(task2scaler),
                                      enabled=opts.fp16, opt_level='O2')

    global_step = 0
    LOGGER.info(f"***** Running training with {n_gpu} GPUs *****")
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    # to compute training statistics
    task2loss = {task: RunningMeter(f'loss/{task}')
                 for task in train_dataloaders.keys()}
    # ITM w/ OT
    if opts.itm_ot_lambda > 0:
        for task in train_dataloaders.keys():
            if task.startswith('itm'):
                task2loss[f'{task}_xe'] = RunningMeter(f'loss/{task}_xe')
                task2loss[f'{task}_ot'] = RunningMeter(f'loss/{task}_ot')
                task2loss[f'{task}_ot_pos'] = RunningMeter(
                    f'loss/{task}_ot_pos')
                task2loss[f'{task}_ot_neg'] = RunningMeter(
                    f'loss/{task}_ot_neg')

    n_examples = defaultdict(int)
    n_in_units = defaultdict(int)
    n_loss_units = defaultdict(int)
    grad_norm = 0

    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    optimizer.step()
    for step, (name, batch) in enumerate(meta_loader):
        # forward pass
        n_examples[name] += batch['input_ids'].size(0)
        n_in_units[name] += (batch['attn_masks'] == 1).sum().item()
        task = name.split('_')[0]
        loss = model(batch, task=task, compute_loss=True)
        if task.startswith('itm'):
            # OT
            itm_loss, ot_loss = loss
            n_loss_units[name] += itm_loss.size(0)
            itm_loss = itm_loss.mean()
            if ot_loss is not None:
                ot_pos, ot_neg = ot_loss
                ot_loss = (ot_pos.sum() - ot_neg.sum()
                           ) / (ot_pos.size(0) + ot_neg.size(0))

                # NOTE: be ware of empty tensor
                ot_pos = ot_pos.mean().item()
                if not math.isnan(ot_pos):
                    task2loss[f'{name}_ot_pos'](ot_pos)
                ot_neg = ot_neg.mean().item()
                if not math.isnan(ot_neg):
                    task2loss[f'{name}_ot_neg'](ot_neg)

                loss = itm_loss + opts.itm_ot_lambda * ot_loss
                task2loss[f'{name}_xe'](itm_loss.item())
                task2loss[f'{name}_ot'](ot_loss.item())
            else:
                loss = itm_loss
        else:
            n_loss_units[name] += loss.size(0)
            loss = loss.mean()  # loss is not normalized in model

        # backward pass
        delay_unscale = (step+1) % opts.gradient_accumulation_steps != 0
        with amp.scale_loss(loss, optimizer, delay_unscale=delay_unscale,
                            loss_id=task2scaler[name]) as scaled_loss:
            scaled_loss.backward()
            if not delay_unscale:
                # gather gradients from every processes
                # do this before unscaling to make sure every process uses
                # the same gradient scale
                grads = [p.grad.data for p in model.parameters()
                         if p.requires_grad and p.grad is not None]
                all_reduce_and_rescale_tensors(grads, float(1))
        task2loss[name](loss.item())

        # optimizer update and logging
        if (step + 1) % opts.gradient_accumulation_steps == 0:
            global_step += 1

            # learning rate scheduling
            lr_this_step = get_lr_sched(global_step, opts)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

            # log loss
            # NOTE: not gathered across GPUs for efficiency
            TB_LOGGER.log_scaler_dict({ll.name: ll.val
                                       for ll in task2loss.values()
                                       if ll.val is not None})
            TB_LOGGER.step()

            # update model params
            if opts.grad_norm != -1:
                grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                            opts.grad_norm)
                TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
            optimizer.step()
            optimizer.zero_grad()
            pbar.update(1)

            if global_step % 100 == 0:
                # monitor training throughput
                LOGGER.info(f'==============Step {global_step}===============')
                for t in train_dataloaders.keys():
                    assert all(tt == t for tt in all_gather_list(t))
                    tot_ex = sum(all_gather_list(n_examples[t]))
                    ex_per_sec = int(tot_ex / (time()-start))
                    tot_in = sum(all_gather_list(n_in_units[t]))
                    in_per_sec = int(tot_in / (time()-start))
                    tot_l = sum(all_gather_list(n_loss_units[t]))
                    l_per_sec = int(tot_l / (time()-start))
                    LOGGER.info(f'{t}: {tot_ex} examples trained at '
                                f'{ex_per_sec} ex/s')
                    TB_LOGGER.add_scalar(f'perf/{t}_ex_per_s', ex_per_sec,
                                         global_step)
                    TB_LOGGER.add_scalar(f'perf/{t}_in_per_s', in_per_sec,
                                         global_step)
                    TB_LOGGER.add_scalar(f'perf/{t}_loss_per_s', l_per_sec,
                                         global_step)
                LOGGER.info('===============================================')

            if global_step % opts.valid_steps == 0:
                LOGGER.info(f'Step {global_step}: start validation')
                validate(model, val_dataloaders)
                model_saver.save(model, global_step)
        if global_step >= opts.num_train_steps:
            break
    if global_step % opts.valid_steps != 0:
        LOGGER.info(f'Step {global_step}: start validation')
        validate(model, val_dataloaders)
        model_saver.save(model, global_step)
예제 #26
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))
    if rank != 0:
        LOGGER.disabled = True

    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(json.load(open(hps_file)))

    assert opts.split in opts.img_db and opts.split in opts.txt_db
    # load DBs and image dirs
    eval_img_db, eval_img_db_gt = load_img_feat(opts.img_db, model_opts)
    eval_txt_db = VcrTxtTokLmdb(opts.txt_db, -1)
    eval_dataset = VcrEvalDataset(
        "test", eval_txt_db, img_db=eval_img_db,
        img_db_gt=eval_img_db_gt)

    # Prepare model
    model = UniterForVisualCommonsenseReasoning.from_pretrained(
        f'{opts.output_dir}/log/model.json', state_dict={},
        img_dim=IMG_DIM)
    model.init_type_embedding()
    model.init_word_embedding(NUM_SPECIAL_TOKENS)
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    state_dict = checkpoint.get('model_state', checkpoint)
    matched_state_dict = {}
    unexpected_keys = set()
    missing_keys = set()
    for name, param in model.named_parameters():
        missing_keys.add(name)
    for key, data in state_dict.items():
        if key in missing_keys:
            matched_state_dict[key] = data
            missing_keys.remove(key)
        else:
            unexpected_keys.add(key)
    LOGGER.info(f"Unexpected_keys: {list(unexpected_keys)}")
    LOGGER.info(f"Missing_keys: {list(missing_keys)}")
    model.load_state_dict(matched_state_dict, strict=False)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 shuffle=False,
                                 collate_fn=vcr_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    _, results = evaluate(model, eval_dataloader)
    result_dir = f'{opts.output_dir}/results_{opts.split}'
    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results = {}
    for id2res in all_gather_list(results):
        all_results.update(id2res)
    if hvd.rank() == 0:
        with open(f'{result_dir}/'
                  f'results_{opts.checkpoint}_all.json', 'w') as f:
            json.dump(all_results, f)
        probs_df = save_for_submission(
            f'{result_dir}/results_{opts.checkpoint}_all.json')
        probs_df.to_csv(f'{result_dir}/results_{opts.checkpoint}_all.csv')
예제 #27
0
파일: inf_vqa.py 프로젝트: dandelin/UNITER
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    hps_file = f"{opts.output_dir}/log/hps.json"
    model_opts = Struct(json.load(open(hps_file)))

    # train_examples = None
    ans2label_file = f"{opts.output_dir}/ckpt/ans2label.json"
    ans2label = json.load(open(ans2label_file))
    label2ans = {label: ans for ans, label in ans2label.items()}

    # load DBs and image dirs
    eval_img_db = DetectFeatLmdb(
        opts.img_db,
        model_opts.conf_th,
        model_opts.max_bb,
        model_opts.min_bb,
        model_opts.num_bb,
        opts.compressed_db,
    )
    eval_txt_db = TxtTokLmdb(opts.txt_db, -1)
    eval_dataset = VqaEvalDataset(len(ans2label), eval_txt_db, eval_img_db)

    # Prepare model
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f"{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt"
    checkpoint = torch.load(ckpt_file)
    model = UniterForVisualQuestionAnswering.from_pretrained(
        f"{opts.output_dir}/log/model.json",
        checkpoint,
        img_dim=IMG_DIM,
        num_answer=len(ans2label),
    )
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level="O2")

    sampler = TokenBucketSampler(
        eval_dataset.lens,
        bucket_size=BUCKET_SIZE,
        batch_size=opts.batch_size,
        droplast=False,
    )
    eval_dataloader = DataLoader(
        eval_dataset,
        batch_sampler=sampler,
        num_workers=opts.n_workers,
        pin_memory=opts.pin_mem,
        collate_fn=vqa_eval_collate,
    )
    eval_dataloader = PrefetchLoader(eval_dataloader)

    val_log, results, logits = evaluate(model, eval_dataloader, label2ans,
                                        opts.save_logits)
    result_dir = f"{opts.output_dir}/results_test"
    if not exists(result_dir) and rank == 0:
        os.makedirs(result_dir)

    all_results = list(concat(all_gather_list(results)))
    if opts.save_logits:
        all_logits = {}
        for id2logit in all_gather_list(logits):
            all_logits.update(id2logit)
    if hvd.rank() == 0:
        with open(f"{result_dir}/"
                  f"results_{opts.checkpoint}_all.json", "w") as f:
            json.dump(all_results, f)
        if opts.save_logits:
            np.savez(f"{result_dir}/logits_{opts.checkpoint}_all.npz",
                     **all_logits)
예제 #28
0
파일: inf_vcr.py 프로젝트: jaeyun95/ViLaKC
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))
    if rank != 0:
        LOGGER.disabled = True

    hps_file = f'{opts.output_dir}/log/hps.json'
    model_opts = Struct(json.load(open(hps_file)))

    assert opts.split in opts.img_db and opts.split in opts.txt_db
    # load DBs and image dirs
    eval_img_db, eval_img_db_gt = load_img_feat(opts.img_db, model_opts)
    eval_txt_db = VcrTxtTokLmdb(opts.txt_db, -1)
    eval_dataset = VcrEvalDataset("val",
                                  eval_txt_db,
                                  img_db=eval_img_db,
                                  img_db_gt=eval_img_db_gt)

    # Prepare model
    model = UniterForVisualCommonsenseReasoning.from_pretrained(
        f'{opts.output_dir}/log/model.json', state_dict={}, img_dim=IMG_DIM)
    model.init_type_embedding()
    model.init_type_embedding_know()
    model.init_word_embedding(NUM_SPECIAL_TOKENS)
    if exists(opts.checkpoint):
        ckpt_file = opts.checkpoint
    else:
        ckpt_file = f'{opts.output_dir}/ckpt/model_step_{opts.checkpoint}.pt'
    checkpoint = torch.load(ckpt_file)
    state_dict = checkpoint.get('model_state', checkpoint)
    matched_state_dict = {}
    unexpected_keys = set()
    missing_keys = set()
    for name, param in model.named_parameters():
        missing_keys.add(name)
    for key, data in state_dict.items():
        if key in missing_keys:
            matched_state_dict[key] = data
            missing_keys.remove(key)
        else:
            unexpected_keys.add(key)
    LOGGER.info(f"Unexpected_keys: {list(unexpected_keys)}")
    LOGGER.info(f"Missing_keys: {list(missing_keys)}")
    model.load_state_dict(matched_state_dict, strict=False)
    model.to(device)
    if opts.fp16:
        model = amp.initialize(model, enabled=True, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=opts.batch_size,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 shuffle=False,
                                 collate_fn=vcr_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    results = evaluate(model, eval_dataloader)

    output = '/src/vlkaf.json'
    before_json = ""
    for i, item in enumerate(results):
        jstring = json.dumps(item)
        before_json += jstring + '\n'

    f = open(output, "w")
    f.write(before_json)
    f.close()
    '''
예제 #29
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(device, n_gpu, hvd.rank(),
                                              opts.fp16))

    if opts.train_config is not None:
        train_opts = Struct(json.load(open(opts.train_config)))
        opts.conf_th = train_opts.conf_th
        opts.max_bb = train_opts.max_bb
        opts.min_bb = train_opts.min_bb
        opts.num_bb = train_opts.num_bb

    # load DBs and image dirs
    eval_img_db = DetectFeatLmdb(opts.img_db, opts.conf_th, opts.max_bb,
                                 opts.min_bb, opts.num_bb, opts.compressed_db)
    eval_txt_db = TxtTokLmdb(opts.txt_db, -1)
    eval_dataset = ItmEvalDataset(eval_txt_db, eval_img_db, opts.batch_size)

    # Prepare model
    checkpoint = torch.load(opts.checkpoint)
    model = UniterForImageTextRetrieval.from_pretrained(opts.model_config,
                                                        checkpoint,
                                                        img_dim=IMG_DIM)
    if 'rank_output' not in checkpoint:
        model.init_output()  # zero shot setting

    model.to(device)
    model = amp.initialize(model, enabled=opts.fp16, opt_level='O2')

    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=1,
                                 num_workers=opts.n_workers,
                                 pin_memory=opts.pin_mem,
                                 collate_fn=itm_eval_collate)
    eval_dataloader = PrefetchLoader(eval_dataloader)

    eval_log, results = evaluate(model, eval_dataloader)
    if hvd.rank() == 0:
        if not exists(opts.output_dir) and rank == 0:
            os.makedirs(opts.output_dir)
        with open(f'{opts.output_dir}/config.json', 'w') as f:
            json.dump(vars(opts), f)
        with open(f'{opts.output_dir}/results.bin', 'wb') as f:
            pickle.dump(results, f)
        with open(f'{opts.output_dir}/scores.json', 'w') as f:
            json.dump(eval_log, f)
        LOGGER.info(f'evaluation finished')
        LOGGER.info(
            f"======================== Results =========================\n"
            f"image retrieval R1: {eval_log['img_r1']*100:.2f},\n"
            f"image retrieval R5: {eval_log['img_r5']*100:.2f},\n"
            f"image retrieval R10: {eval_log['img_r10']*100:.2f}\n"
            f"text retrieval R1: {eval_log['txt_r1']*100:.2f},\n"
            f"text retrieval R5: {eval_log['txt_r5']*100:.2f},\n"
            f"text retrieval R10: {eval_log['txt_r10']*100:.2f}")
        LOGGER.info("========================================================")
예제 #30
0
def main(opts):

    os.makedirs(opts.output_dir)
    os.makedirs(join(opts.output_dir, 'ckpt'))
    model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))

    # train
    train_dataset = MemeAIDataset(json_path='/home/data/meme_json/train.json',
                                  npz_folder='/home/data/faster_cnn_feature/',
                                  mode='train')
    train_loader = DataLoader(train_dataset,
                              batch_size=opts.train_batch_size,
                              shuffle=True,
                              num_workers=opts.n_workers,
                              collate_fn=collate_fn)
    train_loader = PrefetchLoader(train_loader)

    # val
    val_dataset = MemeAIDataset(json_path='/home/data/meme_json/dev.json',
                                npz_folder='/home/data/faster_cnn_feature/',
                                mode='val')
    val_loader = DataLoader(val_dataset,
                            batch_size=opts.inf_minibatch_size,
                            shuffle=False,
                            num_workers=opts.n_workers,
                            collate_fn=collate_fn)
    val_loader = PrefetchLoader(val_loader)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}

    model = Meme.from_pretrained(opts.model_config,
                                 state_dict=checkpoint,
                                 img_dim=IMG_DIM)
    model.init_output()  # pretrain ITM head is different from ranking head
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=opts.learning_rate)

    for epoch in range(opts.epoch):
        print('epoch {}/ {}'.format(epoch, opts.epoch))
        pbar = tqdm(total=len(train_loader))

        model.train()
        preds = None
        gt = None

        for step, batch in enumerate(train_loader):
            x = batch[0]
            x['input_ids'] = x['input_ids'].to(device)
            x['position_ids'] = x['position_ids'].to(device)
            x['img_feat'] = x['img_feat'].to(device)
            x['img_pos_feat'] = x['img_pos_feat'].to(device)
            x['attn_masks'] = x['attn_masks'].to(device)
            x['gather_index'] = x['gather_index'].to(device)
            y = batch[1].to(device)

            pred = model(x)

            if preds is None:
                preds = torch.sigmoid(pred)
                gt = y
            else:
                preds = torch.cat((preds, torch.sigmoid(pred)), dim=0)
                gt = torch.cat((gt, y), dim=0)

            loss = F.binary_cross_entropy(torch.sigmoid(pred), y)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            pbar.update(1)

        model.eval()
        with torch.no_grad():
            preds = preds.detach().cpu().numpy().reshape(len(preds), )
            gt = gt.cpu().numpy()
            roc = roc_auc_score(gt, preds)
            acc = accuracy_score(gt, np.around(preds))

        train_log = {'train/roc': roc, 'train/acc': acc}
        val_log = validate(model, val_loader)

        LOGGER.info(train_log)
        LOGGER.info(val_log)

        model_saver.save(model, epoch)
        pbar.close()