Beispiel #1
0
    def extract(self, output_folder: str) -> None:
        """
        Extract workflow supports multi-gpu feature extraction. Since we are only extracting
        features, only the model is built (and initialized from some model weights file
        if specified by user). The model is set to the eval mode fully.

        The features are extracted for whatever data splits (train, val, test) etc that user
        wants.
        """
        # support feature extraction on gpu only.
        assert self.task.device.type == "cuda", "Set MACHINE.DEVICE = gpu"
        self.task.prepare_extraction(pin_memory=self.cfg.DATA.PIN_MEMORY)

        # Create distributed model
        self._add_dummy_layer()
        self.task.init_distributed_data_parallel_model()
        if is_primary():
            logging.info("Model is:\n {}".format(self.task.model))

        # Get the names of the features that we are extracting. If user doesn't
        # specify the features to evaluate, we get the full model output and freeze
        # head/trunk both as caution.
        feat_names = get_trunk_output_feature_names(self.cfg.MODEL)
        if len(feat_names) == 0:
            feat_names = ["heads"]

        for split in self.task.available_splits:
            logging.info(f"============== Split: {split} =======================")
            logging.info(f"Extracting features for partition: {split.lower()}")
            self.task.data_iterator = iter(self.task.dataloaders[split.lower()])
            self._extract_split_features(feat_names, self.task, split, output_folder)
            logging.info(f"Done getting features for partition: {split.lower()}")

        self._cleanup_task()
def main(args: Namespace, config: AttrDict):
    # setup logging
    setup_logging(__name__)

    # print the coniguration used
    print_cfg(config)

    # setup the environment variables
    set_env_vars(local_rank=0, node_id=0, cfg=config)

    # extract the features
    launch_distributed(
        config,
        args.node_id,
        engine_name="extract_features",
        hook_generator=default_hook_generator,
    )

    # Get the names of the features that we are extracting. If user doesn't
    # specify the features to evaluate, we get the full model output and freeze
    # head/trunk both as caution.
    feat_names = get_trunk_output_feature_names(config.MODEL)
    if len(feat_names) == 0:
        feat_names = ["heads"]

    for layer in feat_names:
        top1, top5 = nearest_neighbor_test(config, layer_name=layer)
        logging.info(f"layer: {layer} Top1: {top1}, Top5: {top5}")
    # close the logging streams including the filehandlers
    shutdown_logging()
Beispiel #3
0
def main(args: Namespace, cfg: AttrDict):
    # setup logging
    setup_logging(__name__)

    # print the cfg
    print_cfg(cfg)

    # setup the environment variables
    set_env_vars(local_rank=0, node_id=0, cfg=cfg)

    output_dir = get_checkpoint_folder(cfg)

    assert cfg.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON, (
        "Feature eval mode is not ON. Can't run train_svm. "
        "Set config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON=True "
        "in your config or from command line.")
    extract_low_shot_features(args, cfg, output_dir)

    # Get the names of the features that we extracted features for. If user doesn't
    # specify the features to evaluate, we get the full model output and freeze
    # head/trunk both as caution.
    layers = get_trunk_output_feature_names(cfg.MODEL)
    if len(layers) == 0:
        layers = ["heads"]

    # train low shot svm for each layer.
    output = {}
    for layer in layers:
        results = train_svm_low_shot(cfg, output_dir, layer)
        output[layer] = results
    logging.info(f"Results: {output}")

    # close the logging streams including the filehandlers
    shutdown_logging()
Beispiel #4
0
    def extract(
        self,
        output_folder: str,
        extract_features: bool = True,
        extract_predictions: bool = False,
    ) -> None:
        """
        Extract workflow supports multi-gpu feature extraction and also extracting
        predicted labels. Since we are only extracting features or label predictions,
        only the model is built (and initialized from some model weights file
        if specified by user). Optionally the meters are built if the labels
        are being extracted. The model is set to the eval mode fully.

        The features / labels are extracted for whatever data splits (train, val, test)
        the user wants.
        """
        # support feature/label predictions extraction on gpu only.
        assert self.task.device.type == "cuda", "Set MACHINE.DEVICE = gpu"
        self.task.prepare_extraction(pin_memory=self.cfg.DATA.PIN_MEMORY)

        # Create distributed model
        self.task.add_dummy_layer()
        self.task.init_distributed_data_parallel_model()
        if is_primary():
            logging.info(f"Model is:\n {self.task.model}")

        # Get the names of the features that we are extracting. If user doesn't
        # specify the features to evaluate, we get the full model output and freeze
        # head/trunk both as caution.
        feat_names = get_trunk_output_feature_names(self.cfg.MODEL)
        if len(feat_names) == 0:
            feat_names = ["heads"]

        self.task.train = False
        self.task.run_hooks(SSLClassyHookFunctions.on_start.name)
        for split in self.task.available_splits:
            logging.info(
                f"============== Split: {split} =======================")
            self.task.data_iterator = iter(
                self.task.dataloaders[split.lower()])
            if extract_features:
                logging.info(
                    f"Extracting features for partition: {split.lower()}")
                self._extract_split_features(feat_names, self.task, split,
                                             output_folder)
                logging.info(
                    f"Done getting features for partition: {split.lower()}")
            if extract_predictions:
                logging.info(
                    f"Extracting predictions for partition: {split.lower()}")
                self._extract_split_label_predictions(feat_names, self.task,
                                                      split, output_folder)
                logging.info(
                    f"Done getting predictions for partition: {split.lower()}")
        self.task.run_hooks(SSLClassyHookFunctions.on_end.name)

        self._cleanup_task()
Beispiel #5
0
 def __init__(self, model_config, optimizer_config):
     self.model_config = model_config
     self.optimizer_config = optimizer_config
     super().__init__()
     self.eval_mode = None  # this is just informational
     self.local_rank, _ = get_machine_local_and_dist_rank()
     self.trunk = self._get_trunk()
     self.heads = nn.ModuleList()
     self.head_names = []
     self._output_feature_names = get_trunk_output_feature_names(self.model_config)
     self._get_heads()
     self._setup_multi_input_head_mapping()
Beispiel #6
0
    def extract(self):
        """
        Extract workflow supports multi-gpu feature extraction. Since we are only extracting
        features, only the model is built (and initialized from some model weights file
        if specified by user). The model is set to the eval mode fully.

        The features are extracted for whatever data splits (train, val, test) etc that user
        wants.
        """
        # support feature extraction on gpu only.
        assert self.task.device.type == "cuda", "Set MACHINE.DEVICE = gpu"
        self.task.prepare_extraction(pin_memory=self.cfg.DATA.PIN_MEMORY)

        # in case of feature evaluation mode, if we are freezing both trunk and
        # head, DDP won't work as there are no parameters in the model. Adding
        # the dummy head will lead to features being not right. So we rather
        # add the dummy layer to the model and use DDP. We copy the model to
        # gpu (if using gpus) after the new dummy layer addition.
        fully_frozen_model = self.task.base_model.is_fully_frozen_model()
        if fully_frozen_model:
            self.task.base_model.dummy_layer = torch.nn.Linear(4, 4)
            if self.task.device.type == "cuda":
                self.task.base_model = copy_model_to_gpu(self.task.base_model)
        self.task.init_distributed_data_parallel_model()

        if is_primary():
            logging.info("Model is:\n {}".format(self.task.model))

        # Get the names of the features that we are extracting. If user doesn't
        # specify the features to evaluate, we get the full model output and freeze
        # head/trunk both as caution.
        feat_names = get_trunk_output_feature_names(self.cfg.MODEL)
        if len(feat_names) == 0:
            feat_names = ["heads"]

        features = {}
        for split in self.task.available_splits:
            logging.info(f"Extracting features for partition: {split.lower()}")
            self.task.data_iterator = iter(
                self.task.dataloaders[split.lower()])
            features[split.lower()] = self._get_split_features(
                feat_names, self.cfg, self.task)
            logging.info(
                f"Done getting features for partition: {split.lower()}")

        if hasattr(self.task, "data_iterator"):
            del self.task.data_iterator
            gc.collect()
        if hasattr(self.task, "dataloaders"):
            del self.task.dataloaders
            gc.collect()
        return features
Beispiel #7
0
def main(args: Namespace, config: AttrDict):
    # setup logging
    setup_logging(__name__, output_dir=get_checkpoint_folder(config))

    # print the coniguration used
    print_cfg(config)

    assert config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON, (
        "Feature eval mode is not ON. Can't run train_svm. "
        "Set config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON=True "
        "in your config or from command line.")

    # extract the features
    if not config.SVM_FEATURES_PATH:
        launch_distributed(
            config,
            args.node_id,
            engine_name="extract_features",
            hook_generator=default_hook_generator,
        )
        config.SVM_FEATURES_PATH = get_checkpoint_folder(config)

    # Get the names of the features that we extracted features for. If user doesn't
    # specify the features to evaluate, we get the full model output and freeze
    # head/trunk both as caution.
    layers = get_trunk_output_feature_names(config.MODEL)
    if len(layers) == 0:
        layers = ["heads"]

    output_dir = get_checkpoint_folder(config)
    running_tasks = [
        mp.Process(target=train_svm, args=(config, output_dir, layer))
        for layer in layers
    ]
    for running_task in running_tasks:
        running_task.start()
    for running_task in running_tasks:
        running_task.join()

    # collect the mAP stats for all the layers and report
    output_mAP = []
    for layer in layers:
        try:
            ap_file = f"{output_dir}/{layer}/test_ap.npy"
            output_mAP.append(round(100.0 * np.mean(load_file(ap_file)), 3))
        except Exception:
            output_mAP.append(-1)
    logging.info(f"AP for various layers:\n {layers}: {output_mAP}")
    # close the logging streams including the filehandlers
    shutdown_logging()
Beispiel #8
0
def run_knn_at_all_layers(config: AttrDict):
    """
    Get the names of the features that we are extracting. If user doesn't
    specify the features to evaluate, we get the full model output and freeze
    head/trunk both as caution.
    """
    feat_names = get_trunk_output_feature_names(config.MODEL)
    if len(feat_names) == 0:
        feat_names = ["heads"]

    for layer in feat_names:
        if config.NEAREST_NEIGHBOR.OPTIMIZE_MEMORY:
            top1, top5, _ = run_knn_at_layer_low_memory(config, layer_name=layer)
        else:
            top1, top5, _ = run_knn_at_layer(config, layer_name=layer)
        logging.info(f"layer: {layer} Top1: {top1}, Top5: {top5}")
Beispiel #9
0
def extract_features_main(
    cfg: AttrDict,
    dist_run_id: str,
    checkpoint_folder: str,
    local_rank: int = 0,
    node_id: int = 0,
):
    """
    Sets up and executes feature extraction workflow per machine.

    Args:
        cfg (AttrDict): user specified input config that has optimizer, loss, meters etc
                        settings relevant to the training
        dist_run_id (str): For multi-gpu training with PyTorch, we have to specify
                           how the gpus are going to rendezvous. This requires specifying
                           the communication method: file, tcp and the unique rendezvous
                           run_id that is specific to 1 run.
                           We recommend:
                                1) for 1node: use init_method=tcp and run_id=auto
                                2) for multi-node, use init_method=tcp and specify
                                run_id={master_node}:{port}
        checkpoint_folder (str): what directory to use for checkpointing. This folder
                                 will be used to output the extracted features as well
                                 in case config.EXTRACT_FEATURES.OUTPUT_DIR is not set
        local_rank (int): id of the current device on the machine. If using gpus,
                        local_rank = gpu number on the current machine
        node_id (int): id of the current machine. starts from 0. valid for multi-gpu
    """

    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)
    dist_rank = int(os.environ["RANK"])

    # setup logging
    setup_logging(__name__, output_dir=checkpoint_folder, rank=dist_rank)

    logging.info(f"Env set for rank: {local_rank}, dist_rank: {dist_rank}")
    # print the environment info for the current node
    if local_rank == 0:
        current_env = os.environ.copy()
        print_system_env_info(current_env)

    # setup the multiprocessing to be forkserver.
    # See https://fb.quip.com/CphdAGUaM5Wf
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg, dist_rank)

    # We set the CUDA device here as well as a safe solution for all downstream
    # `torch.cuda.current_device()` calls to return correct device.
    if cfg.MACHINE.DEVICE == "gpu" and torch.cuda.is_available():
        local_rank, _ = get_machine_local_and_dist_rank()
        torch.cuda.set_device(local_rank)

    # print the training settings and system settings
    if local_rank == 0:
        print_cfg(cfg)
        logging.info("System config:\n{}".format(collect_env_info()))

    # Identify the hooks to run for the extract label engine
    # TODO - we need to plug this better with the engine registry
    #  - we either need to use the global hooks registry
    #  - or we need to create specific hook registry by engine
    hooks = extract_features_hook_generator(cfg)

    # Run the label prediction extraction
    trainer = SelfSupervisionTrainer(cfg, dist_run_id, hooks=hooks)
    output_dir = cfg.EXTRACT_FEATURES.OUTPUT_DIR or checkpoint_folder
    trainer.extract(
        output_folder=cfg.EXTRACT_FEATURES.OUTPUT_DIR or checkpoint_folder,
        extract_features=True,
        extract_predictions=False,
    )

    # TODO (prigoyal): merge this function with _extract_features
    if dist_rank == 0 and cfg.EXTRACT_FEATURES.MAP_FEATURES_TO_IMG_NAME:
        # Get the names of the features that we extracted features for. If user doesn't
        # specify the features to evaluate, we get the full model output and freeze
        # head/trunk both as caution.
        layers = get_trunk_output_feature_names(cfg.MODEL)
        if len(layers) == 0:
            layers = ["heads"]
        available_splits = [
            item.lower() for item in trainer.task.available_splits
        ]
        for split in available_splits:
            image_paths = trainer.task.datasets[split].get_image_paths()[0]
            for layer in layers:
                ExtractedFeaturesLoader.map_features_to_img_filepath(
                    image_paths=image_paths,
                    input_dir=output_dir,
                    split=split,
                    layer=layer,
                )

    logging.info("All Done!")
    # close the logging streams including the filehandlers
    shutdown_logging()