Ejemplo n.º 1
0
def main(opts):
    hvd.init()
    n_gpu = hvd.size()
    device = torch.device("cuda", hvd.local_rank())
    torch.cuda.set_device(hvd.local_rank())
    rank = hvd.rank()
    opts.rank = rank

    
    LOGGER.info("device: {} n_gpu: {}, rank: {}, "
                "16-bits training: {}".format(
                    device, n_gpu, hvd.rank(), opts.fp16))
    device = torch.device("cuda:1")
    if opts.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, "
                         "should be >= 1".format(
                            opts.gradient_accumulation_steps))

    set_random_seed(opts.seed)

    if hvd.rank() == 0:
        TB_LOGGER.create(join(opts.output_dir, 'log'))
        os.makedirs(join(opts.output_dir, 'ckpt'))
        save_training_meta(opts)
        # TB_LOGGER.create(join(opts.output_dir, 'log'))
        model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))
        add_log_to_file(join(opts.output_dir, 'log', 'log.txt'))
        # store ITM predictions
        os.makedirs(join(opts.output_dir, 'results_val'))
        os.makedirs(join(opts.output_dir, 'results_test'))
        os.makedirs(join(opts.output_dir, 'results_train'))
    else:
        LOGGER.disabled = True
        model_saver = NoOp()

    # load DBs and image dirs
    all_img_dbs = ImageLmdbGroup(opts.conf_th, opts.max_bb, opts.min_bb,
                                 opts.num_bb, opts.compressed_db)
    # train
    train_dataset = MemeAIDataset(json_path = '/home/data/meme_json/train.json',
                                    npz_folder = '/home/data/faster_cnn_feature/', 
                                    mode = 'train')
    train_loader =  DataLoader(train_dataset, 
                                    batch_size = opts.train_batch_size, 
                                    shuffle = True, 
                                    num_workers = opts.n_workers,
                                    collate_fn=collate_fn)
    train_loader = PrefetchLoader(train_loader)

    # val
    val_dataset = MemeAIDataset(json_path = '/home/data/meme_json/dev.json',
                                    npz_folder = '/home/data/faster_cnn_feature/', 
                                    mode = 'val')
    val_loader =  DataLoader(val_dataset, 
                                    batch_size = opts.inf_minibatch_size, 
                                    shuffle = False, 
                                    num_workers = opts.n_workers,
                                    collate_fn=collate_fn)
    val_loader = PrefetchLoader(val_loader)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}

    model = Meme.from_pretrained(
        opts.model_config, state_dict=checkpoint,
        img_dim=IMG_DIM)
    model.init_output()  # pretrain ITM head is different from ranking head
    model.to(device)

    # make sure every process has same model parameters in the beginning
    # broadcast_tensors([p.data for p in model.parameters()], 0)
    # set_dropout(model, opts.dropout)

    # Prepare optimizer
    optimizer = build_optimizer(model, opts)
    model, optimizer = amp.initialize(model, optimizer,
                                      enabled=opts.fp16, opt_level='O2')

    global_step = 0
    # LOGGER.info(f"***** Running training on {n_gpu} GPUs *****")
    # LOGGER.info("  Num examples = %d", len(train_dataset) * hvd.size())
    LOGGER.info("  Batch size = %d", opts.train_batch_size)
    LOGGER.info("  Accumulate steps = %d", opts.gradient_accumulation_steps)
    LOGGER.info("  Num steps = %d", opts.num_train_steps)

    running_loss = RunningMeter('loss')
    model.train()

    n_examples = 0
    n_epoch = 0
    start = time()
    # quick hack for amp delay_unscale bug
    optimizer.zero_grad()
    optimizer.step()
    # while True:
    for epoch in range(opts.epoch):
        print('epoch {}/ {}'.format(epoch, opts.epoch))
        pbar = tqdm(total=len(train_loader))

        model.train()
        preds = None
        gt = None

        for step, batch in enumerate(train_loader):
            x = batch[0]
            y = batch[1]
            n_examples += x['input_ids'].size(0)

            pred = model(x)

            if preds is None:

                preds = torch.sigmoid(pred)
                gt = y
            else:
                preds = torch.cat((preds, torch.sigmoid(pred)), dim = 0)
                gt = torch.cat((gt, y), dim = 0)


            loss = F.binary_cross_entropy(torch.sigmoid(pred), y)

            delay_unscale = (step+1) % opts.gradient_accumulation_steps != 0
            with amp.scale_loss(loss, optimizer, delay_unscale=delay_unscale
                                ) as scaled_loss:
                scaled_loss.backward()
                if not delay_unscale:
                    # gather gradients from every processes
                    # do this before unscaling to make sure every process uses
                    # the same gradient scale
                    grads = [p.grad.data for p in model.parameters()
                             if p.requires_grad and p.grad is not None]
                    all_reduce_and_rescale_tensors(grads, float(1))

            running_loss(loss.item())

            if (step + 1) % opts.gradient_accumulation_steps == 0:
                global_step += 1

                # learning rate scheduling
                lr_this_step = get_lr_sched(global_step, opts)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr_this_step
                TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

                # log loss
                # NOTE: not gathered across GPUs for efficiency
                TB_LOGGER.add_scalar('loss', running_loss.val, global_step)
                TB_LOGGER.step()

                # update model params
                if opts.grad_norm != -1:
                    grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                                opts.grad_norm)
                    TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
                optimizer.step()
                optimizer.zero_grad()


        global_step += 1

        # learning rate scheduling
        lr_this_step = get_lr_sched(global_step, opts)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr_this_step
        TB_LOGGER.add_scalar('lr', lr_this_step, global_step)

        # log loss
        # NOTE: not gathered across GPUs for efficiency
        TB_LOGGER.add_scalar('loss', running_loss.val, global_step)
        TB_LOGGER.step()

        # update model params
        if opts.grad_norm != -1:
            grad_norm = clip_grad_norm_(amp.master_params(optimizer),
                                        opts.grad_norm)
            TB_LOGGER.add_scalar('grad_norm', grad_norm, global_step)
        optimizer.step()
        optimizer.zero_grad()

        with torch.no_grad():
            preds = preds.cpu().numpy().reshape(len(preds), )
            gt = gt.cpu().numpy()
            roc = roc_auc_score(gt, preds)
            acc = accuracy_score(gt, np.around(preds)) 
        train_log = {'train/roc': roc, 'train/acc': acc}
        TB_LOGGER.log_scaler_dict({f"train/{k}": v for k, v in train_log.items()})

        # monitor training throughput

        val_log = validate(model, val_loader)
        TB_LOGGER.log_scaler_dict({f"valid/{k}": v for k, v in val_log.items()})

        LOGGER.info(train_log)
        LOGGER.info(val_log)

        model_saver.save(model, global_step)

        pbar.close()
Ejemplo n.º 2
0
def main(opts):

    os.makedirs(opts.output_dir)
    os.makedirs(join(opts.output_dir, 'ckpt'))
    model_saver = ModelSaver(join(opts.output_dir, 'ckpt'))

    # train
    train_dataset = MemeAIDataset(json_path='/home/data/meme_json/train.json',
                                  npz_folder='/home/data/faster_cnn_feature/',
                                  mode='train')
    train_loader = DataLoader(train_dataset,
                              batch_size=opts.train_batch_size,
                              shuffle=True,
                              num_workers=opts.n_workers,
                              collate_fn=collate_fn)
    train_loader = PrefetchLoader(train_loader)

    # val
    val_dataset = MemeAIDataset(json_path='/home/data/meme_json/dev.json',
                                npz_folder='/home/data/faster_cnn_feature/',
                                mode='val')
    val_loader = DataLoader(val_dataset,
                            batch_size=opts.inf_minibatch_size,
                            shuffle=False,
                            num_workers=opts.n_workers,
                            collate_fn=collate_fn)
    val_loader = PrefetchLoader(val_loader)

    # Prepare model
    if opts.checkpoint:
        checkpoint = torch.load(opts.checkpoint)
    else:
        checkpoint = {}

    model = Meme.from_pretrained(opts.model_config,
                                 state_dict=checkpoint,
                                 img_dim=IMG_DIM)
    model.init_output()  # pretrain ITM head is different from ranking head
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=opts.learning_rate)

    for epoch in range(opts.epoch):
        print('epoch {}/ {}'.format(epoch, opts.epoch))
        pbar = tqdm(total=len(train_loader))

        model.train()
        preds = None
        gt = None

        for step, batch in enumerate(train_loader):
            x = batch[0]
            x['input_ids'] = x['input_ids'].to(device)
            x['position_ids'] = x['position_ids'].to(device)
            x['img_feat'] = x['img_feat'].to(device)
            x['img_pos_feat'] = x['img_pos_feat'].to(device)
            x['attn_masks'] = x['attn_masks'].to(device)
            x['gather_index'] = x['gather_index'].to(device)
            y = batch[1].to(device)

            pred = model(x)

            if preds is None:
                preds = torch.sigmoid(pred)
                gt = y
            else:
                preds = torch.cat((preds, torch.sigmoid(pred)), dim=0)
                gt = torch.cat((gt, y), dim=0)

            loss = F.binary_cross_entropy(torch.sigmoid(pred), y)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            pbar.update(1)

        model.eval()
        with torch.no_grad():
            preds = preds.detach().cpu().numpy().reshape(len(preds), )
            gt = gt.cpu().numpy()
            roc = roc_auc_score(gt, preds)
            acc = accuracy_score(gt, np.around(preds))

        train_log = {'train/roc': roc, 'train/acc': acc}
        val_log = validate(model, val_loader)

        LOGGER.info(train_log)
        LOGGER.info(val_log)

        model_saver.save(model, epoch)
        pbar.close()