def get_loss_fn(loss_name: str, cfg: AttrDict):
    set_cpu_device()
    loss_fn = LOSS_REGISTRY[loss_name](cfg)
    loss_fn.__dict__["loss_name"] = loss_name

    _recursive_register(loss_fn)
    return loss_fn
Exemple #2
0
    def __init__(
        self,
        use_gpu: Optional[bool] = None,
        num_dataloader_workers: int = 0,
        dataloader_mp_context: Optional[str] = None,
    ):
        """Constructor for DistributedTrainer.

        Args:
            use_gpu: If true, then use GPU 0 for training.
                If None, then check if we have GPUs available, if we do
                then use GPU for training.
            num_dataloader_workers: Number of CPU processes doing dataloading
                per GPU. If 0, then dataloading is done on main thread.
            dataloader_mp_context: Determines how to launch
                new processes for dataloading. Must be one of "fork", "forkserver",
                "spawn". If None, process launching is inherited from parent.
        """
        super().__init__(
            use_gpu=use_gpu,
            num_dataloader_workers=num_dataloader_workers,
            dataloader_mp_context=dataloader_mp_context,
        )
        _init_env_vars()
        _init_distributed(self.use_gpu)
        logging.info(
            f"Done setting up distributed process_group with rank {get_rank()}"
            + f", world_size {get_world_size()}")
        local_rank = int(os.environ["LOCAL_RANK"])
        if self.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(local_rank))
            set_cuda_device_index(local_rank)
        else:
            logging.info("Using CPU")
            set_cpu_device()
    def __init__(
        self,
        use_gpu,
        num_dataloader_workers,
        elastic_coordinator,
        input_args,
        local_rank,
        dataloader_mp_context=None,
    ):
        super().__init__(
            use_gpu=use_gpu,
            num_dataloader_workers=num_dataloader_workers,
            dataloader_mp_context=dataloader_mp_context,
        )
        pid = os.getpid()
        if use_gpu:
            set_cuda_device_index(local_rank)
            device_idx = torch.cuda.current_device()
            log.info(
                f"initialized worker {local_rank} (pid={pid}, gpu={device_idx})"
            )
            device_properties = torch.cuda.get_device_properties(device_idx)
            log.info(f"gpu device properties: {device_properties}")
        else:
            # cpu
            set_cpu_device()
            log.info(f"initialized worker {local_rank} (pid={pid}, cpu)")

        self.elastic_coordinator = elastic_coordinator
        self.input_args = input_args
Exemple #4
0
    def __init__(
        self,
        use_gpu: Optional[bool] = None,
        num_dataloader_workers: int = 0,
        dataloader_mp_context: Optional[str] = None,
    ):
        """Constructor for LocalTrainer.

        Args:
            use_gpu: If true, then use GPU 0 for training.
                If None, then check if we have GPUs available, if we do
                then use GPU for training.
            num_dataloader_workers: Number of CPU processes doing dataloading
                per GPU. If 0, then dataloading is done on main thread.
            dataloader_mp_context: Determines how to launch
                new processes for dataloading. Must be one of "fork", "forkserver",
                "spawn". If None, process launching is inherited from parent.
        """
        super().__init__(
            use_gpu=use_gpu,
            num_dataloader_workers=num_dataloader_workers,
            dataloader_mp_context=dataloader_mp_context,
        )
        if self.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(0))
            set_cuda_device_index(0)
        else:
            logging.info("Using CPU")
            set_cpu_device()
Exemple #5
0
    def train(self, task):
        if task.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(0))
            set_cuda_device_index(0)
        else:
            logging.info("Using CPU")
            set_cpu_device()

        super().train(task)
    def train(self, task):
        _init_env_vars(task.use_gpu)
        _init_distributed(task.use_gpu)
        logging.info(
            f"Done setting up distributed process_group with rank {get_rank()}"
            + f", world_size {get_world_size()}")
        local_rank = int(os.environ["LOCAL_RANK"])
        if task.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(local_rank))
            set_cuda_device_index(local_rank)
        else:
            logging.info("Using CPU")
            set_cpu_device()

        super().train(task)
Exemple #7
0
    def setup_distributed(self, use_gpu: bool):
        """
        Setup the distributed training. VISSL support both GPU and CPU only training.
        (1) Initialize the torch.distributed.init_process_group if the distributed is
            not already initialized. The init_method, backend are specified by user in the
            yaml config file. See vissl/defaults.yaml file for description on how to set
            init_method, backend.
        (2) We also set the global cuda device index using torch.cuda.set_device or
            cpu device
        """
        # we overwrite the distributed trainer setup here with our config options
        distributed_world_size = int(os.environ["WORLD_SIZE"])
        assert distributed_world_size % self.cfg.DISTRIBUTED.NUM_NODES == 0
        init_method = f"{self.cfg.DISTRIBUTED.INIT_METHOD}://{self.dist_run_id}"
        logging.info(
            f"Using Distributed init method: {init_method}, "
            f"world_size: {distributed_world_size}, rank: {self.distributed_rank}"
        )

        if not torch.distributed.is_initialized():
            torch.distributed.init_process_group(
                backend=self.cfg.DISTRIBUTED.BACKEND,
                init_method=init_method,
                world_size=distributed_world_size,
                rank=self.distributed_rank,
            )
        else:
            logging.warning(
                "Torch distributed has already been initialized, \
                reusing existing configuration"
            )

        logging.info(
            "| initialized host {} as rank {} ({})".format(
                socket.gethostname(),
                self.distributed_rank,
                torch.distributed.get_rank(),
            )
        )
        if use_gpu:
            set_cuda_device_index(self.local_rank)
            # perform a dummy all-reduce to initialize the NCCL communicator
            if torch.cuda.is_available() and (self.cfg.DISTRIBUTED.BACKEND == "nccl"):
                dist.all_reduce(torch.zeros(1).cuda())
        else:
            set_cpu_device()
Exemple #8
0
import torch.nn as nn
from classy_vision.generic.distributed_util import set_cpu_device
from parameterized import param, parameterized
from vissl.config import AttrDict
from vissl.losses.barlow_twins_loss import BarlowTwinsCriterion
from vissl.losses.cross_entropy_multiple_output_single_target import (
    CrossEntropyMultipleOutputSingleTargetCriterion,
    CrossEntropyMultipleOutputSingleTargetLoss,
)
from vissl.losses.multicrop_simclr_info_nce_loss import MultiCropSimclrInfoNCECriterion
from vissl.losses.simclr_info_nce_loss import SimclrInfoNCECriterion
from vissl.losses.swav_loss import SwAVCriterion

logger = logging.getLogger("__name__")

set_cpu_device()

BATCH_SIZE = 2048
EMBEDDING_DIM = 128
NUM_CROPS = 2
BUFFER_PARAMS_STRUCT = namedtuple(
    "BUFFER_PARAMS_STRUCT",
    ["effective_batch_size", "world_size", "embedding_dim"])
BUFFER_PARAMS = BUFFER_PARAMS_STRUCT(BATCH_SIZE, 1, EMBEDDING_DIM)


class TestLossesForward(unittest.TestCase):
    """
    Minimal testing of the losses: ensure that a forward pass with believable
    dimensions succeeds. This does not make them correct per say.
    """