Exemple #1
0
def extract_clusters(
    cfg: AttrDict,
    dist_run_id: str,
    checkpoint_folder: str,
    local_rank: int = 0,
    node_id: int = 0,
):
    """
    Sets up and executes model visualisation extraction workflow on one node
    """

    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)
    dist_rank = int(os.environ["RANK"])

    # setup logging
    setup_logging(__name__, output_dir=checkpoint_folder, rank=dist_rank)

    logging.info(f"Env set for rank: {local_rank}, dist_rank: {dist_rank}")
    # print the environment info for the current node
    if local_rank == 0:
        current_env = os.environ.copy()
        print_system_env_info(current_env)

    # setup the multiprocessing to be forkserver.
    # See https://fb.quip.com/CphdAGUaM5Wf
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg, dist_rank)

    # We set the CUDA device here as well as a safe solution for all downstream
    # `torch.cuda.current_device()` calls to return correct device.
    if cfg.MACHINE.DEVICE == "gpu" and torch.cuda.is_available():
        local_rank, _ = get_machine_local_and_dist_rank()
        torch.cuda.set_device(local_rank)

    # print the training settings and system settings
    if local_rank == 0:
        print_cfg(cfg)
        logging.info("System config:\n{}".format(collect_env_info()))

    # Build the SSL trainer to set up distributed training and then
    # extract the cluster assignments for all entries in the dataset
    trainer = SelfSupervisionTrainer(cfg, dist_run_id)
    cluster_assignments = trainer.extract_clusters()

    # Save the cluster assignments in the output folder
    if dist_rank == 0:
        ClusterAssignmentLoader.save_cluster_assignment(
            output_dir=get_checkpoint_folder(cfg),
            assignments=ClusterAssignment(
                config=cfg, cluster_assignments=cluster_assignments),
        )

    # close the logging streams including the file handlers
    logging.info("All Done!")
    shutdown_logging()
Exemple #2
0
def get_loader(
    dataset: GenericSSLDataset,
    dataset_config: dict,
    num_dataloader_workers: int,
    pin_memory: bool,
    multi_processing_method: str,
    device: torch.device,
    sampler_seed=0,
    get_sampler=get_sampler,
    worker_init_fn=set_dataloader_seeds,
):
    """
    Get the dataloader for the given satasets and data split

    Args:
        dataset (GenericSSLDataset):    the dataset object for which dataloader is constructed
        dataset_config (dict):          configuration of the dataset.
                                        should be DATA.TRAIN or DATA.TEST settings
        num_dataloader_workers (int):   number of workers per gpu (or cpu) training
        pin_memory (bool):              whether to pin memory or not
        multi_processing_method (str):  method to use. options: forkserver | fork | spawn
        sampler_seed (int):             seed for the sampler. Should be identical per process
        device (torch.device):          training on cuda or cpu
        get_sampler (get_sampler):      function that is used to get the sampler
        worker_init_fn (None):          any function that should be executed during
                                        initialization of dataloader workers

    Returns:
        Instance of Pytorch DataLoader. The dataloader is wrapped with
        DataloaderAsyncGPUWrapper or DataloaderSyncGPUWrapper depending
        on whether user wants to copy data to gpu async or not.
    """

    # pytorch dataloader requires setting the multiprocessing type.
    setup_multiprocessing_method(multi_processing_method)

    # we don't need to set the rank, replicas as the Sampler already does so in
    # it's init function
    data_sampler = get_sampler(dataset, dataset_config, sampler_seed)
    collate_function = get_collator(dataset_config["COLLATE_FUNCTION"],
                                    dataset_config["COLLATE_FUNCTION_PARAMS"])

    # Replace the worker_init_fn with a deterministic one when debugging
    if dataset_config["USE_DEBUGGING_SAMPLER"]:
        worker_init_fn = debugging_worker_init_fn

    # Create the pytorch dataloader
    dataloader = DataLoader(
        dataset=dataset,
        num_workers=num_dataloader_workers,
        pin_memory=pin_memory,
        shuffle=False,
        batch_size=dataset_config["BATCHSIZE_PER_REPLICA"],
        collate_fn=collate_function,
        sampler=data_sampler,
        drop_last=dataset_config["DROP_LAST"],
        worker_init_fn=worker_init_fn,
    )

    # If the targeted device is CUDA, set up async device copy:
    # - makes sure that samples are on device
    # - overlap the copy with the previous batch computation.
    if device.type == "cuda":
        if dataset.cfg["DATA"]["ENABLE_ASYNC_GPU_COPY"]:
            logging.info(
                "Wrapping the dataloader to async device copies")  # NOQA
            dataloader = DataloaderAsyncGPUWrapper(dataloader)
        else:
            logging.info(
                "Wrapping the dataloader to synchronous device copies")  # NOQA
            dataloader = DataloaderSyncGPUWrapper(dataloader)

    else:
        logging.warning("Selecting a CPU device")

    return dataloader
def extract_main(
    cfg: AttrDict,
    dist_run_id: str,
    checkpoint_folder: str,
    local_rank: int = 0,
    node_id: int = 0,
):
    """
    Sets up and executes feature extraction workflow per machine.

    Args:
        cfg (AttrDict): user specified input config that has optimizer, loss, meters etc
                        settings relevant to the training
        dist_run_id (str): For multi-gpu training with PyTorch, we have to specify
                           how the gpus are going to rendezvous. This requires specifying
                           the communication method: file, tcp and the unique rendezvous
                           run_id that is specific to 1 run.
                           We recommend:
                                1) for 1node: use init_method=tcp and run_id=auto
                                2) for multi-node, use init_method=tcp and specify
                                run_id={master_node}:{port}
        local_rank (int): id of the current device on the machine. If using gpus,
                        local_rank = gpu number on the current machine
        node_id (int): id of the current machine. starts from 0. valid for multi-gpu
    """

    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)
    dist_rank = int(os.environ["RANK"])

    # setup logging
    setup_logging(__name__, output_dir=checkpoint_folder, rank=dist_rank)

    logging.info(f"Env set for rank: {local_rank}, dist_rank: {dist_rank}")
    # print the environment info for the current node
    if local_rank == 0:
        current_env = os.environ.copy()
        print_system_env_info(current_env)

    # setup the multiprocessing to be forkserver.
    # See https://fb.quip.com/CphdAGUaM5Wf
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg, dist_rank)

    # We set the CUDA device here as well as a safe solution for all downstream
    # `torch.cuda.current_device()` calls to return correct device.
    if cfg.MACHINE.DEVICE == "gpu" and torch.cuda.is_available():
        local_rank, _ = get_machine_local_and_dist_rank()
        torch.cuda.set_device(local_rank)

    # print the training settings and system settings
    if local_rank == 0:
        print_cfg(cfg)
        logging.info("System config:\n{}".format(collect_env_info()))

    trainer = SelfSupervisionTrainer(cfg, dist_run_id)
    features = trainer.extract()

    for split in features.keys():
        logging.info(f"============== Split: {split} =======================")
        for layer_name, layer_features in features[split].items():
            out_feat_file = os.path.join(
                checkpoint_folder,
                f"rank{dist_rank}_{split}_{layer_name}_features.npy")
            out_target_file = os.path.join(
                checkpoint_folder,
                f"rank{dist_rank}_{split}_{layer_name}_targets.npy")
            out_inds_file = os.path.join(
                checkpoint_folder,
                f"rank{dist_rank}_{split}_{layer_name}_inds.npy")
            feat_shape = layer_features["features"].shape
            logging.info(
                f"Saving extracted features of {layer_name} with shape {feat_shape} to: {out_feat_file}"
            )
            save_file(layer_features["features"], out_feat_file)
            logging.info(
                f"Saving extracted targets of {layer_name} to: {out_target_file}"
            )
            save_file(layer_features["targets"], out_target_file)
            logging.info(
                f"Saving extracted indices of {layer_name} to: {out_inds_file}"
            )
            save_file(layer_features["inds"], out_inds_file)

    logging.info("All Done!")
    # close the logging streams including the filehandlers
    shutdown_logging()
Exemple #4
0
def extract_label_predictions_main(
    cfg: AttrDict,
    dist_run_id: str,
    checkpoint_folder: str,
    local_rank: int = 0,
    node_id: int = 0,
):
    """
    Sets up and executes label predictions workflow per machine. Runs the
    model in eval mode only to extract the label predicted per class.

    Args:
        cfg (AttrDict): user specified input config that has optimizer, loss, meters etc
                        settings relevant for the feature extraction.
        dist_run_id (str): For multi-gpu training with PyTorch, we have to specify
                           how the gpus are going to rendezvous. This requires specifying
                           the communication method: file, tcp and the unique rendezvous
                           run_id that is specific to 1 run.
                           We recommend:
                                1) for 1node: use init_method=tcp and run_id=auto
                                2) for multi-node, use init_method=tcp and specify
                                run_id={master_node}:{port}
        local_rank (int): id of the current device on the machine. If using gpus,
                        local_rank = gpu number on the current machine
        node_id (int): id of the current machine. starts from 0. valid for multi-gpu
    """

    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)
    dist_rank = int(os.environ["RANK"])

    # setup logging
    setup_logging(__name__, output_dir=checkpoint_folder, rank=dist_rank)

    # setup the multiprocessing to be forkserver. See https://fb.quip.com/CphdAGUaM5Wf
    logging.info(
        f"Setting multiprocessing method: {cfg.MULTI_PROCESSING_METHOD}")
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg, dist_rank)

    # We set the CUDA device here as well as a safe solution for all downstream
    # `torch.cuda.current_device()` calls to return correct device.
    if cfg.MACHINE.DEVICE == "gpu" and torch.cuda.is_available():
        local_rank, _ = get_machine_local_and_dist_rank()
        torch.cuda.set_device(local_rank)

    # print the training settings and system settings
    # print the environment info for the current node
    logging.info(f"Env set for rank: {local_rank}, dist_rank: {dist_rank}")
    if local_rank == 0:
        current_env = os.environ.copy()
        print_system_env_info(current_env)
        print_cfg(cfg)
        logging.info(f"System config:\n{collect_env_info()}")

    # Identify the hooks to run for the extract label engine
    # TODO - we need to plug this better with the engine registry
    #  - we either need to use the global hooks registry
    #  - or we need to create specific hook registry by engine
    hooks = extract_label_hook_generator(cfg)

    trainer = SelfSupervisionTrainer(cfg, dist_run_id, hooks=hooks)
    trainer.extract(
        output_folder=cfg.EXTRACT_FEATURES.OUTPUT_DIR or checkpoint_folder,
        extract_features=False,
        extract_predictions=True,
    )

    logging.info("All Done!")
    # close the logging streams including the filehandlers
    shutdown_logging()
Exemple #5
0
def extract_main(cfg: AttrDict,
                 dist_run_id: str,
                 local_rank: int = 0,
                 node_id: int = 0):
    """
    Sets up and executes feature extraction workflow per machine.

    Args:
        cfg (AttrDict): user specified input config that has optimizer, loss, meters etc
                        settings relevant to the training
        dist_run_id (str): For multi-gpu training with PyTorch, we have to specify
                           how the gpus are going to rendezvous. This requires specifying
                           the communication method: file, tcp and the unique rendezvous
                           run_id that is specific to 1 run.
                           We recommend:
                                1) for 1node: use init_method=tcp and run_id=auto
                                2) for multi-node, use init_method=tcp and specify
                                run_id={master_node}:{port}
        local_rank (int): id of the current device on the machine. If using gpus,
                        local_rank = gpu number on the current machine
        node_id (int): id of the current machine. starts from 0. valid for multi-gpu
    """

    # setup logging
    setup_logging(__name__)
    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)

    # setup the multiprocessing to be forkserver.
    # See https://fb.quip.com/CphdAGUaM5Wf
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg)

    # print the training settings and system settings
    local_rank, _ = get_machine_local_and_dist_rank()
    if local_rank == 0:
        print_cfg(cfg)
        logging.info("System config:\n{}".format(collect_env_info()))

    output_dir = get_checkpoint_folder(cfg)
    trainer = SelfSupervisionTrainer(cfg, dist_run_id)
    features = trainer.extract()

    for split in features.keys():
        logging.info(f"============== Split: {split} =======================")
        layers = features[split].keys()
        for layer in layers:
            out_feat_file = (
                f"{output_dir}/rank{local_rank}_{split}_{layer}_features.npy")
            out_target_file = (
                f"{output_dir}/rank{local_rank}_{split}_{layer}_targets.npy")
            out_inds_file = f"{output_dir}/rank{local_rank}_{split}_{layer}_inds.npy"
            logging.info("Saving extracted features: {} {} to: {}".format(
                layer, features[split][layer]["features"].shape,
                out_feat_file))
            save_file(features[split][layer]["features"], out_feat_file)
            logging.info("Saving extracted targets: {} to: {}".format(
                features[split][layer]["targets"].shape, out_target_file))
            save_file(features[split][layer]["targets"], out_target_file)
            logging.info("Saving extracted indices: {} to: {}".format(
                features[split][layer]["inds"].shape, out_inds_file))
            save_file(features[split][layer]["inds"], out_inds_file)
    logging.info("All Done!")
    # close the logging streams including the filehandlers
    shutdown_logging()
Exemple #6
0
def build_dataloader(
    dataset: GenericSSLDataset,
    dataset_config: dict,
    num_dataloader_workers: int,
    pin_memory: bool,
    multi_processing_method: str,
    device: torch.device,
    sampler_seed=0,
    get_sampler=get_sampler,
    worker_init_fn=set_dataloader_seeds,
    **kwargs,
):
    """
    Get the dataloader for the given satasets and data split

    Args:
        dataset (GenericSSLDataset):    the dataset object for which dataloader is constructed
        dataset_config (dict):          configuration of the dataset.
                                        should be DATA.TRAIN or DATA.TEST settings
        num_dataloader_workers (int):   number of workers per gpu (or cpu) training
        pin_memory (bool):              whether to pin memory or not
        multi_processing_method (str):  method to use. options: forkserver | fork | spawn
        sampler_seed (int):             seed for the sampler. Should be identical per process
        device (torch.device):          training on cuda or cpu
        get_sampler (get_sampler):      function that is used to get the sampler
        worker_init_fn (None):          any function that should be executed during
                                        initialization of dataloader workers

    Returns:
        Instance of Pytorch DataLoader. The dataloader is wrapped with
        DataloaderAsyncGPUWrapper or DataloaderSyncGPUWrapper depending
        on whether user wants to copy data to gpu async or not.
    """

    # pytorch dataloader requires setting the multiprocessing type.
    setup_multiprocessing_method(multi_processing_method)

    # we don't need to set the rank, replicas as the Sampler already does so in
    # it's init function
    data_sampler = get_sampler(dataset, dataset_config, sampler_seed)
    collate_function = get_collator(dataset_config["COLLATE_FUNCTION"],
                                    dataset_config["COLLATE_FUNCTION_PARAMS"])

    # Replace the worker_init_fn with a deterministic one when debugging
    if dataset_config["USE_DEBUGGING_SAMPLER"]:
        worker_init_fn = debugging_worker_init_fn

    # Load the labels of the dataset before creating the data loader
    # or else the load of files will happen on each data loader separately
    # decreasing performance / hitting quota on data source
    dataset.load_labels()

    # Create the pytorch dataloader
    dataloader = DataLoader(
        dataset=dataset,
        num_workers=num_dataloader_workers,
        pin_memory=pin_memory,
        shuffle=False,
        batch_size=dataset_config["BATCHSIZE_PER_REPLICA"],
        collate_fn=collate_function,
        sampler=data_sampler,
        drop_last=dataset_config["DROP_LAST"],
        worker_init_fn=worker_init_fn,
    )
    enable_async_gpu_copy = dataset.cfg["DATA"]["ENABLE_ASYNC_GPU_COPY"]
    dataloader = wrap_dataloader(dataloader, enable_async_gpu_copy, device)

    return dataloader
Exemple #7
0
def train_main(
    cfg: AttrDict,
    dist_run_id: str,
    checkpoint_path: str,
    checkpoint_folder: str,
    local_rank: int = 0,
    node_id: int = 0,
    hook_generator: Callable[[Any], List[ClassyHook]] = default_hook_generator,
):
    """
    Sets up and executes training workflow per machine.

    Args:
        cfg (AttrDict): user specified input config that has optimizer, loss, meters etc
                        settings relevant to the training
        dist_run_id (str): For multi-gpu training with PyTorch, we have to specify
                           how the gpus are going to rendezvous. This requires specifying
                           the communication method: file, tcp and the unique rendezvous
                           run_id that is specific to 1 run.
                           We recommend:
                                1) for 1node: use init_method=tcp and run_id=auto
                                2) for multi-node, use init_method=tcp and specify
                                run_id={master_node}:{port}
        checkpoint_path (str): if the training is being resumed from a checkpoint, path to
                          the checkpoint. The tools/run_distributed_engines.py automatically
                          looks for the checkpoint in the checkpoint directory.
        checkpoint_folder (str): what directory to use for checkpointing. The
                          tools/run_distributed_engines.py creates the directory based on user
                          input in the yaml config file.
        local_rank (int): id of the current device on the machine. If using gpus,
                        local_rank = gpu number on the current machine
        node_id (int): id of the current machine. starts from 0. valid for multi-gpu
        hook_generator (Callable): The utility function that prepares all the hoooks that will
                         be used in training based on user selection. Some basic hooks are used
                         by default.
    """

    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)
    dist_rank = int(os.environ["RANK"])

    # setup logging
    setup_logging(__name__, output_dir=checkpoint_folder, rank=dist_rank)

    logging.info(f"Env set for rank: {local_rank}, dist_rank: {dist_rank}")
    # print the environment info for the current node
    if local_rank == 0:
        current_env = os.environ.copy()
        print_system_env_info(current_env)

    # setup the multiprocessing to be forkserver.
    # See https://fb.quip.com/CphdAGUaM5Wf
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg, dist_rank)

    # We set the CUDA device here as well as a safe solution for all downstream
    # `torch.cuda.current_device()` calls to return correct device.
    if cfg.MACHINE.DEVICE == "gpu" and torch.cuda.is_available():
        local_rank, _ = get_machine_local_and_dist_rank()
        torch.cuda.set_device(local_rank)

    # print the training settings and system settings
    if local_rank == 0:
        print_cfg(cfg)
        logging.info("System config:\n{}".format(collect_env_info()))

    # get the hooks - these hooks are executed per replica
    hooks = hook_generator(cfg)

    # build the SSL trainer. The trainer first prepares a "task" object which
    # acts as a container for various things needed in a training: datasets,
    # dataloader, optimizers, losses, hooks, etc. "Task" will also have information
    # about phases (train, test) both. The trainer then sets up distributed
    # training.
    trainer = SelfSupervisionTrainer(
        cfg, dist_run_id, checkpoint_path, checkpoint_folder, hooks
    )
    trainer.train()
    logging.info("All Done!")
    # close the logging streams including the filehandlers
    shutdown_logging()
Exemple #8
0
def extract_features_main(
    cfg: AttrDict,
    dist_run_id: str,
    checkpoint_folder: str,
    local_rank: int = 0,
    node_id: int = 0,
):
    """
    Sets up and executes feature extraction workflow per machine.

    Args:
        cfg (AttrDict): user specified input config that has optimizer, loss, meters etc
                        settings relevant to the training
        dist_run_id (str): For multi-gpu training with PyTorch, we have to specify
                           how the gpus are going to rendezvous. This requires specifying
                           the communication method: file, tcp and the unique rendezvous
                           run_id that is specific to 1 run.
                           We recommend:
                                1) for 1node: use init_method=tcp and run_id=auto
                                2) for multi-node, use init_method=tcp and specify
                                run_id={master_node}:{port}
        checkpoint_folder (str): what directory to use for checkpointing. This folder
                                 will be used to output the extracted features as well
                                 in case config.EXTRACT_FEATURES.OUTPUT_DIR is not set
        local_rank (int): id of the current device on the machine. If using gpus,
                        local_rank = gpu number on the current machine
        node_id (int): id of the current machine. starts from 0. valid for multi-gpu
    """

    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)
    dist_rank = int(os.environ["RANK"])

    # setup logging
    setup_logging(__name__, output_dir=checkpoint_folder, rank=dist_rank)

    logging.info(f"Env set for rank: {local_rank}, dist_rank: {dist_rank}")
    # print the environment info for the current node
    if local_rank == 0:
        current_env = os.environ.copy()
        print_system_env_info(current_env)

    # setup the multiprocessing to be forkserver.
    # See https://fb.quip.com/CphdAGUaM5Wf
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg, dist_rank)

    # We set the CUDA device here as well as a safe solution for all downstream
    # `torch.cuda.current_device()` calls to return correct device.
    if cfg.MACHINE.DEVICE == "gpu" and torch.cuda.is_available():
        local_rank, _ = get_machine_local_and_dist_rank()
        torch.cuda.set_device(local_rank)

    # print the training settings and system settings
    if local_rank == 0:
        print_cfg(cfg)
        logging.info("System config:\n{}".format(collect_env_info()))

    # Identify the hooks to run for the extract label engine
    # TODO - we need to plug this better with the engine registry
    #  - we either need to use the global hooks registry
    #  - or we need to create specific hook registry by engine
    hooks = extract_features_hook_generator(cfg)

    # Run the label prediction extraction
    trainer = SelfSupervisionTrainer(cfg, dist_run_id, hooks=hooks)
    output_dir = cfg.EXTRACT_FEATURES.OUTPUT_DIR or checkpoint_folder
    trainer.extract(
        output_folder=cfg.EXTRACT_FEATURES.OUTPUT_DIR or checkpoint_folder,
        extract_features=True,
        extract_predictions=False,
    )

    # TODO (prigoyal): merge this function with _extract_features
    if dist_rank == 0 and cfg.EXTRACT_FEATURES.MAP_FEATURES_TO_IMG_NAME:
        # Get the names of the features that we extracted features for. If user doesn't
        # specify the features to evaluate, we get the full model output and freeze
        # head/trunk both as caution.
        layers = get_trunk_output_feature_names(cfg.MODEL)
        if len(layers) == 0:
            layers = ["heads"]
        available_splits = [
            item.lower() for item in trainer.task.available_splits
        ]
        for split in available_splits:
            image_paths = trainer.task.datasets[split].get_image_paths()[0]
            for layer in layers:
                ExtractedFeaturesLoader.map_features_to_img_filepath(
                    image_paths=image_paths,
                    input_dir=output_dir,
                    split=split,
                    layer=layer,
                )

    logging.info("All Done!")
    # close the logging streams including the filehandlers
    shutdown_logging()