Example #1
0
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    _C = Config(_A.config, _A.config_override)

    tokenizer = TokenizerFactory.from_config(_C)

    if _A.data_root is None:
        _A.data_root = os.path.join(_C.DATA.ROOT, "val2017")

    val_dataloader = DataLoader(
        ImageDirectoryDataset(_A.data_root),
        batch_size=_C.OPTIM.BATCH_SIZE,
        num_workers=_A.cpu_workers,
        pin_memory=True,
    )
    # Initialize model from a checkpoint.
    model = PretrainingModelFactory.from_config(_C).to(device)
    ITERATION = CheckpointManager(model=model).load(_A.checkpoint_path)
    model.eval()

    # Make a list of predictions to evaluate.
    predictions: List[Dict[str, Any]] = []

    for val_iteration, val_batch in enumerate(val_dataloader, start=1):

        val_batch["image"] = val_batch["image"].to(device)
        with torch.no_grad():
            output_dict = model(val_batch)

        # Make a dictionary of predictions in COCO format.
        for image_id, caption in zip(val_batch["image_id"],
                                     output_dict["predictions"]):
            predictions.append({
                "image_id": image_id.item(),
                "caption": tokenizer.decode(caption.tolist()),
            })

    # Save predictions as a JSON file if specified.
    if _A.output is not None:
        os.makedirs(os.path.dirname(_A.output), exist_ok=True)
        json.dump(predictions, open(_A.output, "w"))
        logger.info(f"Saved predictions to {_A.output}")

    # Calculate CIDEr and SPICE metrics using ground truth COCO Captions. This
    # should be skipped when running inference on arbitrary images.
    if _A.calc_metrics:
        # Assume ground truth (COCO val2017 annotations) exist.
        gt = os.path.join(_C.DATA.ROOT, "annotations", "captions_val2017.json")

        metrics = CocoCaptionsEvaluator(gt).evaluate(predictions)
        logger.info(f"Iter: {ITERATION} | Metrics: {metrics}")
Example #2
0
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    _C = Config(_A.config, _A.config_override)

    tokenizer = TokenizerFactory.from_config(_C)
    val_dataloader = DataLoader(
        CocoCaptionsEvalDataset(_C.DATA.ROOT),
        batch_size=_C.OPTIM.BATCH_SIZE,
        num_workers=_A.cpu_workers,
        pin_memory=True,
    )
    # Initialize model from a checkpoint.
    model = PretrainingModelFactory.from_config(_C).to(device)
    ITERATION = CheckpointManager(model=model).load(_A.checkpoint_path)
    model.eval()

    # Make a list of predictions to evaluate.
    predictions: List[Dict[str, Any]] = []

    for val_iteration, val_batch in enumerate(val_dataloader, start=1):
        for key in val_batch:
            val_batch[key] = val_batch[key].to(device)

        # Make a dictionary of predictions in COCO format.
        with torch.no_grad():
            output_dict = model(val_batch)

        for image_id, caption in zip(val_batch["image_id"],
                                     output_dict["predictions"]):
            predictions.append({
                "image_id": image_id.item(),
                "caption": tokenizer.decode(caption.tolist),
            })

    # Assume ground truth (COCO val2017 annotations) exist.
    gt = os.path.join(_C.DATA.ROOT, "annotations", "captions_val2017.json")

    metrics = CocoCaptionsEvaluator(gt).evaluate(predictions)
    logger.info(f"Iter: {ITERATION} | Metrics: {metrics}")
Example #3
0
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    _C = Config(_A.config, _A.config_override)

    tokenizer = TokenizerFactory.from_config(_C)

    if _A.data_root is None:
        _A.data_root = os.path.join(_C.DATA.ROOT, "val2017")

    val_dataloader = DataLoader(
        ImageDirectoryDataset(_A.data_root),
        batch_size=16,
        num_workers=_A.cpu_workers,
        pin_memory=True,
        shuffle=True,
    )
    # Initialize model from a checkpoint.
    model = PretrainingModelFactory.from_config(_C).to(device)
    ITERATION = CheckpointManager(model=model).load(_A.checkpoint_path)
    model.eval()
    model.sample_on()

    val_batch = next(iter(val_dataloader))
    val_batch['image'] = val_batch['image'].to(device)
    with torch.no_grad():
        output_dict = model(val_batch, sample_mode='greedy')

    for caption in output_dict["predictions"][:, 1:]:
        print(tokenizer.decode(caption.tolist()))

    mean = torch.tensor(IMAGENET_COLOR_MEAN,
                        dtype=torch.float).view(1, 3, 1, 1)
    std = torch.tensor(IMAGENET_COLOR_STD, dtype=torch.float).view(1, 3, 1, 1)
    images = val_batch['image'].cpu() * std + mean

    save_image(images, 'images.png', nrow=4)
Example #4
0
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    _C = Config(_A.config, _A.config_override)

    tokenizer = TokenizerFactory.from_config(_C)

    dataset = PretrainingDatasetFactory.from_config(_C, split='train')

    dataloader = DataLoader(dataset,
                            batch_size=_C.OPTIM.BATCH_SIZE,
                            shuffle=False,
                            num_workers=_A.cpu_workers,
                            drop_last=False,
                            collate_fn=dataset.collate_fn)
    print('dataloader size:', len(dataloader))

    photoids = dict()

    pbar = tqdm(total=len(dataloader))
    for batch in dataloader:
        for image_id, photo_id in zip(batch["image_id"], batch["photo_id"]):
            photoids[image_id.item()] = photo_id.item()
        pbar.update(1)
    pbar.close()

    # Save predictions as a JSON file if specified.
    os.makedirs(os.path.dirname(_A.output), exist_ok=True)
    json.dump(photoids, open(_A.output, "w"))
    logger.info(f"Saved photoids to {_A.output}")
Example #5
0
def main(_A: argparse.Namespace):
    apex = False
    is_cpu = False
    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
        is_cpu = True
    else:
        # Get the current device as set for current distributed process.
        # Check `launch` function in `virtex.utils.distributed` module.
        device = torch.cuda.current_device()

    # Create a config object (this will be immutable) and perform common setup
    # such as logging and setting up serialization directory.
    _C = Config(_A.config, _A.config_override)
    common_setup(_C, _A)

    # -------------------------------------------------------------------------
    #   INSTANTIATE DATALOADER, MODEL, OPTIMIZER
    # -------------------------------------------------------------------------
    tokenizer = TokenizerFactory.from_config(_C)
    train_dataset = PretrainingDatasetFactory.from_config(_C,
                                                          split="train",
                                                          csv=_A.train_csv)
    val_dataset = PretrainingDatasetFactory.from_config(_C,
                                                        split="val",
                                                        csv=_A.val_csv)

    train_dataloader = DataLoader(
        train_dataset,
        batch_size=_C.OPTIM.BATCH_SIZE // dist.get_world_size(),
        #sampler= Sampler(train_dataset),
        sampler=DistributedSampler(train_dataset, shuffle=True),
        num_workers=_A.cpu_workers,
        pin_memory=True,
        drop_last=True,
        collate_fn=train_dataset.collate_fn,
    )
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=_C.OPTIM.BATCH_SIZE // dist.get_world_size(),
        # sampler = Sampler(val_dataset),
        sampler=DistributedSampler(val_dataset, shuffle=False),
        num_workers=_A.cpu_workers,
        pin_memory=True,
        drop_last=False,
        collate_fn=val_dataset.collate_fn,
    )

    model = PretrainingModelFactory.from_config(_C).to(device)
    optimizer = OptimizerFactory.from_config(_C, model.named_parameters())
    scheduler = LRSchedulerFactory.from_config(_C, optimizer)

    # -------------------------------------------------------------------------
    #   BEFORE TRAINING STARTS
    # -------------------------------------------------------------------------

    # Load checkpoint to resume training if specified.
    if _A.resume_from is not None:
        start_iteration = CheckpointManager(model=model,
                                            optimizer=optimizer,
                                            scheduler=scheduler).load(
                                                _A.resume_from)
    else:
        start_iteration = 0

    # Keep track of time per iteration and ETA.
    timer = Timer(
        start_from=start_iteration + 1,
        total_iterations=_C.OPTIM.NUM_ITERATIONS,
    )
    # Create an iterator from dataloader to sample batches perpetually.
    train_dataloader_iter = cycle(train_dataloader, device, start_iteration)

    if (not is_cpu):
        # Wrap model and optimizer using NVIDIA Apex for mixed precision training.
        # NOTE: Always do this before wrapping model with DistributedDataParallel.
        if apex:
            if _C.FP16_OPT > 0:
                from apex import amp

                model, optimizer = amp.initialize(model,
                                                  optimizer,
                                                  opt_level=f"O{_C.FP16_OPT}")

        # Wrap model in DDP if using more than one processes.
        if dist.get_world_size() > 1:
            dist.synchronize()
            model = nn.parallel.DistributedDataParallel(
                model, device_ids=[device], find_unused_parameters=True)

        # Create checkpoint manager and tensorboard writer (only in master process).
        if dist.is_master_process():
            checkpoint_manager = CheckpointManager(
                _A.serialization_dir,
                model=model,
                optimizer=optimizer,
                scheduler=scheduler,
            )
            tensorboard_writer = SummaryWriter(log_dir=_A.serialization_dir)
            tensorboard_writer.add_text("config", f"```\n{_C}\n```")

    # -------------------------------------------------------------------------
    #   TRAINING LOOP
    # -------------------------------------------------------------------------
    for iteration in range(start_iteration + 1, _C.OPTIM.NUM_ITERATIONS + 1):
        timer.tic()
        optimizer.zero_grad()

        batch_loss = torch.tensor(0.0, device=device)

        batch = next(train_dataloader_iter)
        output_dict = model(batch)

        loss = output_dict["loss"]
        batch_loss += loss.item()

        # Perform dynamic scaling of loss to adjust for mixed precision.
        if apex and _C.FP16_OPT > 0:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        # Clip norm of gradients before optimizer step.
        torch.nn.utils.clip_grad_norm_(
            amp.master_params(optimizer)
            if apex and _C.FP16_OPT > 0 else model.parameters(),
            _C.OPTIM.CLIP_GRAD_NORM,
        )
        optimizer.step()
        scheduler.step(iteration)
        timer.toc()

        # ---------------------------------------------------------------------
        #   TENSORBOARD LOGGING
        # ---------------------------------------------------------------------
        if iteration % _A.log_every == 0 and dist.is_master_process():
            logger.info(f"{timer.stats} | Loss: {batch_loss:.3f} | "
                        f"GPU mem: {dist.gpu_mem_usage()} MB")
            tensorboard_writer.add_scalars(
                "learning_rate",
                {
                    "visual": optimizer.param_groups[0]["lr"],
                    "common": optimizer.param_groups[-1]["lr"],
                },
                iteration,
            )
            tensorboard_writer.add_scalars("train",
                                           output_dict["loss_components"],
                                           iteration)

        # ---------------------------------------------------------------------
        #   VALIDATION
        # ---------------------------------------------------------------------
        if iteration % _A.checkpoint_every == 0:
            if dist.is_master_process():
                checkpoint_manager.step(iteration)

            torch.set_grad_enabled(False)
            model.eval()

            # Accumulate different val loss components according to the type of
            # pretraining model.
            val_loss_counter: Counter = Counter()

            for val_iteration, val_batch in enumerate(val_dataloader, start=1):
                for key in val_batch:
                    val_batch[key] = val_batch[key].to(device)
                output_dict = model(val_batch)

                val_loss_counter.update(output_dict["loss_components"])

            # Divide each loss component by number of val batches per GPU.
            val_loss_dict = {
                k: v / val_iteration
                for k, v in dict(val_loss_counter).items()
            }
            dist.average_across_processes(val_loss_dict)
            torch.set_grad_enabled(True)
            model.train()

        if iteration % _A.checkpoint_every == 0 and dist.is_master_process():
            logger.info(f"Iter: {iteration} | Val loss: {val_loss_dict}")
            tensorboard_writer.add_scalars("val", val_loss_dict, iteration)

        # All processes will wait till master process is done logging.
        dist.synchronize()
Example #6
0
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    _C = Config(_A.config, _A.config_override)

    tokenizer = TokenizerFactory.from_config(_C)

    if _A.data_root is None:
        _A.data_root = os.path.join(_C.DATA.ROOT, "val2017")

    val_dataset = PretrainingDatasetFactory.from_config(_C, split='val', all_captions=True)
    val_dataset_no_image = PretrainingDatasetFactory.from_config(_C, split='val', all_captions=True, include_image=False)

    val_sampler = (
        DistributedSampler(val_dataset, shuffle=False)
        if _A.num_gpus_per_machine > 0
        else None
    )
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=_C.OPTIM.BATCH_SIZE // dist.get_world_size(),
        sampler=val_sampler,
        shuffle=False,
        num_workers=_A.cpu_workers,
        pin_memory=True,
        drop_last=False,
        collate_fn=val_dataset.collate_fn
    )

    val_dataloader_no_image = DataLoader(
        val_dataset_no_image,
        batch_size=_C.OPTIM.BATCH_SIZE // dist.get_world_size(),
        shuffle=False,
        drop_last=False,
        collate_fn=val_dataset.collate_fn
    )

    evaluator = CiderEvaluator(val_dataloader_no_image, prefix='val')

    # Initialize model from a checkpoint.
    model = PretrainingModelFactory.from_config(_C).to(device)
    ITERATION = CheckpointManager(model=model).load(_A.checkpoint_path)
    model.eval()

    # Make a list of predictions to evaluate.
    predictions: List[Dict[str, Any]] = []

    if dist.is_master_process():
        pbar = tqdm(total=len(val_dataloader))
    for val_iteration, val_batch in enumerate(val_dataloader, start=1):
        val_batch = {'image_id': val_batch['image_id'].to(device),
                     'image': val_batch['image'].to(device)}
        with torch.no_grad():
            output_dict = model(val_batch)

        # Make a dictionary of predictions in COCO format.
        for image_id, caption in zip(
            val_batch["image_id"], output_dict["predictions"][:, 1:]
        ):
            predictions.append(
                {
                    # Convert image id to int if possible (mainly for COCO eval).
                    "image_id": image_id.item(),
                    "caption": tokenizer.decode(caption.tolist()),
                }
            )
        if dist.is_master_process():
            pbar.update(1)
    if dist.is_master_process():
        pbar.close()

    # Save predictions as a JSON file if specified.
    if _A.output is not None:
        os.makedirs(os.path.dirname(_A.output), exist_ok=True)
        json.dump(predictions, open(_A.output, "w"))
        logger.info(f"Saved predictions to {_A.output}")

    # Calculate CIDEr and SPICE metrics using ground truth COCO Captions. This
    # should be skipped when running inference on arbitrary images.
    if _A.calc_metrics:

        metrics = evaluator.evaluate(predictions)
        metrics = {k: torch.tensor(v, dtype=torch.float, device=device)
                   for k, v in metrics.items()}
        dist.average_across_processes(metrics)
        metrics = {k: v.item() for k, v in metrics.items()}
        if dist.is_master_process():
            logger.info(f"Iter: {ITERATION} | Metrics: {metrics}")
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    _C = Config(_A.config, _A.config_override)

    tokenizer = TokenizerFactory.from_config(_C)

    dataset = PretrainingDatasetFactory.from_config(_C, split='train')

    sampler = (
        DistributedSampler(dataset, shuffle=False)
        if _A.num_gpus_per_machine > 0
        else None
    )
    val_dataloader = DataLoader(
        dataset,
        batch_size=_C.OPTIM.BATCH_SIZE // dist.get_world_size(),
        sampler=sampler,
        shuffle=False,
        num_workers=_A.cpu_workers,
        pin_memory=True,
        drop_last=False,
        collate_fn=dataset.collate_fn
    )

    # Initialize model from a checkpoint.
    model = PretrainingModelFactory.from_config(_C).to(device)
    ITERATION = CheckpointManager(model=model).load(_A.checkpoint_path)
    model.eval()
    torch.set_grad_enabled(False)
    model.sample_on()

    captions = dict()
    
    if dist.is_master_process():
        pbar = tqdm(total=len(val_dataloader))
    for val_iteration, val_batch in enumerate(val_dataloader, start=1):
        val_batch = {'image_id': val_batch['image_id'].to(device),
                     'image': val_batch['image'].to(device)}
        predictions = []
        for k in [1]:
            predictions.append(model(val_batch, sample_mode='beam', n_samples_per_image=k)['predictions'][:, 1:])
        max_length = max([p.shape[1] for p in predictions])
        predictions = [torch.cat((p, torch.zeros(p.shape[0], max_length - p.shape[1], device=device)), dim=1) for p in predictions]
        predictions = torch.stack(predictions, dim=1)

        # Make a dictionary of predictions in COCO format.
        for image_id, caption in zip(
            val_batch["image_id"], predictions
        ):
            captions[image_id.item()] = [tokenizer.decode(c.tolist()).strip() for c in caption]
        if dist.is_master_process():
            pbar.update(1)
    if dist.is_master_process():
        pbar.close()

    # Save predictions as a JSON file if specified.
    folder = os.path.dirname(_A.output)
    os.makedirs(folder, exist_ok=True)

    filename = os.path.basename(_A.output)
    filepath = os.path.join(folder, f'{dist.get_rank()}_{filename}')
    
    json.dump(captions, open(filepath, "w"))
    logger.info(f"Saved predictions to {filepath}")