コード例 #1
0
def main(_A: argparse.Namespace):

    # Get the current device as set for current distributed process.
    # Check `launch` function in `virtex.utils.distributed` module.
    device = torch.cuda.current_device()

    # Local process group is needed for detectron2.
    pg = list(range(dist.get_world_size()))
    d2.utils.comm._LOCAL_PROCESS_GROUP = torch.distributed.new_group(pg)

    # Create a config object (this will be immutable) and perform common setup
    # such as logging and setting up serialization directory.
    import pdb
    pdb.set_trace()
    if _A.weight_init == "imagenet":
        _A.config_override.extend(["MODEL.VISUAL.PRETRAINED", True])
    _C = Config(_A.config, _A.config_override)

    # We use `default_setup` from detectron2 to do some common setup, such as
    # logging, setting up serialization etc. For more info, look into source.
    _D2C = build_detectron2_config(_C, _A)
    default_setup(_D2C, _A)

    # Prepare weights to pass in instantiation call of trainer.
    if _A.weight_init in {"virtex", "torchvision"}:
        if _A.resume:
            # If resuming training, let detectron2 load weights by providing path.
            model = None
            weights = _A.checkpoint_path
        else:
            # Load backbone weights from VirTex pretrained checkpoint.
            model = PretrainingModelFactory.from_config(_C)
            if _A.weight_init == "virtex":
                CheckpointManager(model=model).load(_A.checkpoint_path)
            else:
                model.visual.cnn.load_state_dict(
                    torch.load(_A.checkpoint_path, map_location="cpu")["state_dict"],
                    strict=False,
                )
            weights = model.visual.detectron2_backbone_state_dict()
    else:
        # If random or imagenet init, just load weights after initializing model.
        model = PretrainingModelFactory.from_config(_C)
        weights = model.visual.detectron2_backbone_state_dict()

    # Back up pretrain config and model checkpoint (if provided).
    _C.dump(os.path.join(_A.serialization_dir, "pretrain_config.yaml"))
    if _A.weight_init == "virtex" and not _A.resume:
        torch.save(
            model.state_dict(),
            os.path.join(_A.serialization_dir, "pretrain_model.pth"),
        )

    del model
    trainer = DownstreamTrainer(_D2C, weights)
    trainer.test() if _A.eval_only else trainer.train()
コード例 #2
0
ファイル: common.py プロジェクト: kdexd/virtex
def common_setup(_C: Config,
                 _A: argparse.Namespace,
                 job_type: str = "pretrain"):
    r"""
    Setup common stuff at the start of every pretraining or downstream
    evaluation job, all listed here to avoid code duplication. Basic steps:

    1. Fix random seeds and other PyTorch flags.
    2. Set up a serialization directory and loggers.
    3. Log important stuff such as config, process info (useful during
        distributed training).
    4. Save a copy of config to serialization directory.

    .. note::

        It is assumed that multiple processes for distributed training have
        already been launched from outside. Functions from
        :mod:`virtex.utils.distributed` module ae used to get process info.

    Args:
        _C: Config object with all the parameters.
        _A: Argparse command line arguments.
        job_type: Type of job for which setup is to be done; one of
            ``{"pretrain", "downstream"}``.
    """

    # Get process rank and world size (assuming distributed is initialized).
    RANK = dist.get_rank()
    WORLD_SIZE = dist.get_world_size()

    # For reproducibility - refer https://pytorch.org/docs/stable/notes/randomness.html
    torch.manual_seed(_C.RANDOM_SEED)
    torch.backends.cudnn.deterministic = _C.CUDNN_DETERMINISTIC
    torch.backends.cudnn.benchmark = _C.CUDNN_BENCHMARK
    random.seed(_C.RANDOM_SEED)
    np.random.seed(_C.RANDOM_SEED)

    # Create serialization directory and save config in it.
    os.makedirs(_A.serialization_dir, exist_ok=True)
    _C.dump(os.path.join(_A.serialization_dir, f"{job_type}_config.yaml"))

    # Remove default logger, create a logger for each process which writes to a
    # separate log-file. This makes changes in global scope.
    logger.remove(0)
    if dist.get_world_size() > 1:
        logger.add(
            os.path.join(_A.serialization_dir, f"log-rank{RANK}.txt"),
            format="{time} {level} {message}",
        )

    # Add a logger for stdout only for the master process.
    if dist.is_master_process():
        logger.add(sys.stdout,
                   format="<g>{time}</g>: <lvl>{message}</lvl>",
                   colorize=True)

    # Print process info, config and args.
    logger.info(f"Rank of current process: {RANK}. World size: {WORLD_SIZE}")
    logger.info(str(_C))

    logger.info("Command line args:")
    for arg in vars(_A):
        logger.info("{:<20}: {}".format(arg, getattr(_A, arg)))
コード例 #3
0
ファイル: clf_voc07.py プロジェクト: wukexingwukexing/virtex
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    # Create a downstream config object (this will be immutable) and perform
    # common setup such as logging and setting up serialization directory.
    _DOWNC = Config(_A.down_config, _A.down_config_override)
    common_setup(_DOWNC, _A, job_type="downstream")

    # Create a (pretraining) config object and backup in serialization directory.
    _C = Config(_A.config, _A.config_override)
    _C.dump(os.path.join(_A.serialization_dir, "pretrain_config.yaml"))

    # -------------------------------------------------------------------------
    #   INSTANTIATE DATALOADER, MODEL, AND FEATURE EXTRACTOR
    # -------------------------------------------------------------------------

    train_dataset = DownstreamDatasetFactory.from_config(_DOWNC,
                                                         split="trainval")
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=_DOWNC.OPTIM.BATCH_SIZE,
        num_workers=_A.cpu_workers,
        pin_memory=True,
    )
    test_dataset = DownstreamDatasetFactory.from_config(_DOWNC, split="test")
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=_DOWNC.OPTIM.BATCH_SIZE,
        num_workers=_A.cpu_workers,
        pin_memory=True,
    )
    NUM_CLASSES = len(train_dataset.class_names)

    # Initialize from a checkpoint, but only keep the visual module.
    model = PretrainingModelFactory.from_config(_C)

    # Load weights according to the init method, do nothing for `random`, and
    # `imagenet` is already taken care of.
    if _A.weight_init == "virtex":
        ITERATION = CheckpointManager(model=model).load(_A.checkpoint_path)
    elif _A.weight_init == "torchvision":
        # Keep strict=False because this state dict may have weights for
        # last fc layer.
        model.visual.cnn.load_state_dict(
            torch.load(_A.checkpoint_path, map_location="cpu")["state_dict"],
            strict=False,
        )

    model = FeatureExtractor(model,
                             layer_name=_A.layer,
                             flatten_and_normalize=True)
    model = model.to(device).eval()

    # -------------------------------------------------------------------------
    #   EXTRACT FEATURES FOR TRAINING SVMs
    # -------------------------------------------------------------------------

    features_train: List[torch.Tensor] = []
    targets_train: List[torch.Tensor] = []

    features_test: List[torch.Tensor] = []
    targets_test: List[torch.Tensor] = []

    # VOC07 is small, extract all features and keep them in memory.
    with torch.no_grad():
        for batch in tqdm(train_dataloader, desc="Extracting train features:"):
            features = model(batch["image"].to(device))

            features_train.append(features.cpu())
            targets_train.append(batch["label"])

        # Similarly extract test features.
        for batch in tqdm(test_dataloader, desc="Extracting test features:"):
            features = model(batch["image"].to(device))

            features_test.append(features.cpu())
            targets_test.append(batch["label"])

    # Convert batches of features/targets to one large numpy array
    features_train = torch.cat(features_train, dim=0).numpy()
    targets_train = torch.cat(targets_train, dim=0).numpy().astype(np.int32)

    features_test = torch.cat(features_test, dim=0).numpy()
    targets_test = torch.cat(targets_test, dim=0).numpy().astype(np.int32)

    # -------------------------------------------------------------------------
    #   TRAIN AND TEST SVMs WITH EXTRACTED FEATURES
    # -------------------------------------------------------------------------

    input_args: List[Any] = []

    # Iterate over all VOC07 classes and train one-vs-all linear SVMs.
    for cls_idx in range(NUM_CLASSES):
        # fmt: off
        input_args.append((
            features_train,
            targets_train[:, cls_idx],
            features_test,
            targets_test[:, cls_idx],
            train_dataset.class_names[cls_idx],
        ))
        # fmt: on

    pool = mp.Pool(processes=_A.cpu_workers)
    pool_output = pool.map(train_test_single_svm, input_args)

    # -------------------------------------------------------------------------
    #   TENSORBOARD LOGGING (RELEVANT MAINLY FOR weight_init=checkpoint)
    # -------------------------------------------------------------------------

    # Tensorboard writer for logging mAP scores. This is useful especially
    # when weight_init=checkpoint (which maybe be coming from a training job).
    tensorboard_writer = SummaryWriter(log_dir=_A.serialization_dir)

    # Test set mAP for each class, for features from every layer.
    test_map = torch.tensor(pool_output).mean()
    logger.info(f"mAP: {test_map}")

    # Tensorboard logging only when _A.weight_init == "virtex"
    if _A.weight_init == "virtex":
        tensorboard_writer.add_scalars("metrics/voc07_clf", {"mAP": test_map},
                                       ITERATION)
コード例 #4
0
ファイル: clf_linear.py プロジェクト: wukexingwukexing/virtex
def main(_A: argparse.Namespace):

    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device as set for current distributed process.
        # Check `launch` function in `virtex.utils.distributed` module.
        device = torch.cuda.current_device()

    # Create a downstream config object (this will be immutable) and perform
    # common setup such as logging and setting up serialization directory.
    _DOWNC = Config(_A.down_config, _A.down_config_override)
    common_setup(_DOWNC, _A, job_type="downstream")

    # Create a (pretraining) config object and backup in serializaion directory.
    _C = Config(_A.config, _A.config_override)
    _C.dump(os.path.join(_A.serialization_dir, "pretrain_config.yaml"))

    # Get dataset name for tensorboard logging.
    DATASET = _DOWNC.DATA.ROOT.split("/")[-1]

    # Set number of output classes according to dataset:
    NUM_CLASSES_MAPPING = {"imagenet": 1000, "inaturalist": 8142}
    NUM_CLASSES = NUM_CLASSES_MAPPING[DATASET]

    # -------------------------------------------------------------------------
    #   INSTANTIATE DATALOADER, MODEL, OPTIMIZER, SCHEDULER
    # -------------------------------------------------------------------------
    train_dataset = DownstreamDatasetFactory.from_config(_DOWNC, split="train")
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=_DOWNC.OPTIM.BATCH_SIZE // dist.get_world_size(),
        num_workers=_A.cpu_workers,
        sampler=DistributedSampler(
            train_dataset,
            num_replicas=dist.get_world_size(),
            rank=dist.get_rank(),
            shuffle=True,
        ),
        drop_last=False,
        pin_memory=True,
        collate_fn=train_dataset.collate_fn,
    )
    val_dataset = DownstreamDatasetFactory.from_config(_DOWNC, split="val")
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=_DOWNC.OPTIM.BATCH_SIZE // dist.get_world_size(),
        num_workers=_A.cpu_workers,
        sampler=DistributedSampler(
            val_dataset,
            num_replicas=dist.get_world_size(),
            rank=dist.get_rank(),
            shuffle=False,
        ),
        pin_memory=True,
        drop_last=False,
        collate_fn=val_dataset.collate_fn,
    )
    # Initialize model using pretraining config.
    pretrained_model = PretrainingModelFactory.from_config(_C)

    # Load weights according to the init method, do nothing for `random`, and
    # `imagenet` is already taken care of.
    if _A.weight_init == "virtex":
        CheckpointManager(model=pretrained_model).load(_A.checkpoint_path)
    elif _A.weight_init == "torchvision":
        # Keep strict=False because this state dict may have weights for
        # last fc layer.
        pretrained_model.visual.cnn.load_state_dict(
            torch.load(_A.checkpoint_path, map_location="cpu")["state_dict"],
            strict=False,
        )

    # Pull out the CNN (torchvision-like) from our pretrained model and add
    # back the FC layer - this is exists in torchvision models, and is set to
    # `nn.Identity()` during pretraining.
    model = pretrained_model.visual.cnn  # type: ignore
    model.fc = nn.Linear(_DOWNC.MODEL.VISUAL.FEATURE_SIZE,
                         NUM_CLASSES).to(device)
    model = model.to(device)

    # Re-initialize the FC layer.
    torch.nn.init.normal_(model.fc.weight.data, mean=0.0, std=0.01)
    torch.nn.init.constant_(model.fc.bias.data, 0.0)

    # Freeze all layers except FC as per config param.
    if _DOWNC.MODEL.VISUAL.FROZEN:
        for name, param in model.named_parameters():
            if "fc" not in name:
                param.requires_grad = False

    # Cross entropy loss and accuracy meter.
    criterion = nn.CrossEntropyLoss()
    top1 = TopkAccuracy(top_k=1)

    optimizer = OptimizerFactory.from_config(_DOWNC, model.named_parameters())
    scheduler = LRSchedulerFactory.from_config(_DOWNC, optimizer)
    del pretrained_model

    # -------------------------------------------------------------------------
    #  BEFORE TRAINING STARTS
    # -------------------------------------------------------------------------

    # Create an iterator from dataloader to sample batches perpetually.
    train_dataloader_iter = cycle(train_dataloader, device)

    # Wrap model and optimizer using NVIDIA Apex for mixed precision training.
    # NOTE: Always do this before wrapping model with DistributedDataParallel.
    if _DOWNC.FP16_OPT > 0:
        from apex import amp

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=f"O{_DOWNC.FP16_OPT}")

    if dist.get_world_size() > 1:
        dist.synchronize()
        model = nn.parallel.DistributedDataParallel(
            model, device_ids=[device], find_unused_parameters=True)

    if dist.is_master_process():
        checkpoint_manager = CheckpointManager(
            _A.serialization_dir,
            model=model,
            optimizer=optimizer,
            scheduler=scheduler,
        )
        tensorboard_writer = SummaryWriter(log_dir=_A.serialization_dir)

    # Keep track of time per iteration and ETA.
    timer = Timer(start_from=1, total_iterations=_DOWNC.OPTIM.NUM_ITERATIONS)

    # -------------------------------------------------------------------------
    #   TRAINING LOOP
    # -------------------------------------------------------------------------
    for iteration in range(1, _DOWNC.OPTIM.NUM_ITERATIONS + 1):
        timer.tic()
        optimizer.zero_grad()
        batch = next(train_dataloader_iter)

        logits = model(batch["image"])
        loss = criterion(logits, batch["label"])

        # Perform dynamic scaling of loss to adjust for mixed precision.
        if _DOWNC.FP16_OPT > 0:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        optimizer.step()
        scheduler.step(iteration)
        timer.toc()

        if iteration % _A.log_every == 0 and dist.is_master_process():
            logger.info(
                f"{timer.stats} | Loss: {loss:.3f} | GPU: {dist.gpu_mem_usage()} MB"
            )
            tensorboard_writer.add_scalar(f"{DATASET}/train_loss", loss,
                                          iteration)
            tensorboard_writer.add_scalar(
                f"{DATASET}/learning_rate",
                optimizer.param_groups[0]["lr"],
                iteration,
            )

        # ---------------------------------------------------------------------
        #   VALIDATION
        # ---------------------------------------------------------------------
        if iteration % _A.checkpoint_every == 0:
            torch.set_grad_enabled(False)
            model.eval()

            total_val_loss = torch.tensor(0.0).to(device)

            for val_iteration, batch in enumerate(val_dataloader, start=1):
                for key in batch:
                    batch[key] = batch[key].to(device)

                logits = model(batch["image"])
                loss = criterion(logits, batch["label"])
                top1(logits, batch["label"])
                total_val_loss += loss

            # Divide each loss component by number of val batches per GPU.
            total_val_loss = total_val_loss / val_iteration
            dist.average_across_processes(total_val_loss)

            # Get accumulated Top-1 accuracy for logging across GPUs.
            acc = top1.get_metric(reset=True)
            dist.average_across_processes(acc)

            torch.set_grad_enabled(True)
            model.train()

            # Save recent checkpoint and best checkpoint based on accuracy.
            if dist.is_master_process():
                checkpoint_manager.step(iteration)

        if iteration % _A.checkpoint_every == 0 and dist.is_master_process():
            logger.info(f"Iter: {iteration} | Top-1 accuracy: {acc})")
            tensorboard_writer.add_scalar(f"{DATASET}/val_loss",
                                          total_val_loss, iteration)
            # This name scoping will result in Tensorboard displaying all metrics
            # (VOC07, caption, etc.) together.
            tensorboard_writer.add_scalars(f"metrics/{DATASET}", {"top1": acc},
                                           iteration)

        # All processes will wait till master process is done logging.
        dist.synchronize()
コード例 #5
0
def main(_A: argparse.Namespace):
    if _A.num_gpus_per_machine == 0:
        # Set device as CPU if num_gpus_per_machine = 0.
        device = torch.device("cpu")
    else:
        # Get the current device (this will be zero here by default).
        device = torch.cuda.current_device()

    # Create a downstream config object (this will be immutable) and perform
    # common setup such as logging and setting up serialization directory.
    _DOWNC = Config(_A.down_config, _A.down_config_override)
    common_setup(_DOWNC, _A, job_type="downstream")

    # Create a (pretraining) config object and backup in serialization directory.
    _C = Config(_A.config, _A.config_override)
    _C.dump(os.path.join(_A.serialization_dir, "pretrain_config.yaml"))

    # -------------------------------------------------------------------------
    #   INSTANTIATE DATALOADER, MODEL, AND FEATURE EXTRACTOR
    # -------------------------------------------------------------------------
    train_dataset = DownstreamDatasetFactory.from_config(_DOWNC, split="trainval", csv=_A.csv)
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=_DOWNC.OPTIM.BATCH_SIZE,
        num_workers=_A.cpu_workers,
        pin_memory=True,
        shuffle=False,
    )

    print(f"train dataset length {len(train_dataset)}")
    # Initialize from a checkpoint, but only keep the visual module.
    model = PretrainingModelFactory.from_config(_C)

    # Load weights according to the init method, do nothing for `random`, and
    # `imagenet` is already taken care of.
    if _A.weight_init == "virtex":
        ITERATION = CheckpointManager(model=model).load(_A.checkpoint_path)
    elif _A.weight_init == "torchvision":
        # Keep strict=False because this state dict may have weights for
        # last fc layer.
        model.visual.cnn.load_state_dict(
            torch.load(_A.checkpoint_path, map_location="cpu")["state_dict"],
            strict=False,
        )

    model = FeatureExtractor(model, layer_name=_A.layer, flatten_and_normalize=True)
    model = model.to(device).eval()

    # -------------------------------------------------------------------------
    #   EXTRACT FEATURES FOR TRAINING SVMs
    # -------------------------------------------------------------------------

    features_train: List[torch.Tensor] = []
    targets_train: List[torch.Tensor] = []
    print("input csv is {}".format(_A.csv))

    # VOC07 is small, extract all features and keep them in memory.
    count = 0
    with torch.no_grad():
        for batch in tqdm(train_dataloader, desc="Extracting train features:"):
            # if count<=4000:
            #     count+=1
            #     continue
            features = model(batch["image"].to(device))
            count += 1
            print("train features has shape {}, video_id {}".format(features.shape, batch['image_id']))
            if count % 1000 == 0:

                torch.save(features_train, "./features_phoenix/features_{}_{}.pt".format(_A.mode, count))
                features_train = []
            features_train.append(features.cpu())

    torch.save(features_train, "./features_phoenix/features_end_{}.pt".format(_A.mode))
    print('finished saving features')