예제 #1
0
def load_checkpoint(
    checkpoint_folder, device=CPU_DEVICE, checkpoint_file=CHECKPOINT_FILE
):
    """
    Loads a state variable from the specified checkpoint folder.
    """
    if not checkpoint_folder:
        return None

    assert device is not None, "Please specify what device to load checkpoint on"
    assert device.type in ["cpu", "cuda"], f"Unknown device: {device}"
    if device.type == "cuda":
        assert torch.cuda.is_available()

    if not PathManager.exists(checkpoint_folder):
        logging.warning("Checkpoint folder '%s' not found" % checkpoint_folder)
        return None
    logging.info("Attempting to load checkpoint from '%s'" % checkpoint_folder)

    # read what the latest model file is:
    filename = f"{checkpoint_folder}/{checkpoint_file}"
    if not PathManager.exists(filename):
        logging.warning("Checkpoint file %s not found." % filename)
        return None

    # load model on specified device and not on saved device for model and return
    # the checkpoint
    with PathManager.open(filename, "rb") as f:
        checkpoint = torch.load(f, map_location=device)
    logging.info(f"Loaded checkpoint from {filename}")
    return checkpoint
def main():
    parser = argparse.ArgumentParser(
        description="Create the iNaturalist2018 data information file."
    )
    parser.add_argument(
        "-i",
        "--input_dir_path",
        type=str,
        help="Path to the parent directory of the iNaturalist2018 data set",
    )
    parser.add_argument(
        "-o",
        "--output_dir_path",
        type=str,
        help="Folder where the classification dataset will be written",
    )
    parser.add_argument(
        "-d",
        "--download",
        action="store_const",
        const=True,
        default=False,
        help="To download the original dataset and decompress it in the input folder",
    )
    args = parser.parse_args()

    # Make sure that the input and output directories exist.
    assert PathManager.exists(
        args.input_dir_path
    ), "Data input directory not found! Please create the directory"
    assert PathManager.exists(
        args.output_dir_path
    ), "Data output directory not found! Please create the directory"

    # Download dataset to input path
    if args.download:
        download_dataset(args.input_dir_path)

    # Process training and validation datasets into numpy arrays
    logger.info("========Preparing train data files========")
    train_images, train_labels = get_images_labels_info(
        "/train2018.json", args.input_dir_path
    )
    logger.info("========Preparing val data files========")
    val_images, val_labels = get_images_labels_info(
        "/val2018.json", args.input_dir_path
    )

    # Save as numpy files to output path
    logger.info("=================Saving train data files=======================")
    train_label_file_name = f"{ args.output_dir_path }/train_labels.npy"
    train_image_file_name = f"{ args.output_dir_path }/train_images.npy"
    save_file(train_images, train_image_file_name)
    save_file(train_labels, train_label_file_name)

    logger.info("=================Saving val data files=======================")
    val_label_file_name = f"{ args.output_dir_path }/val_labels.npy"
    val_image_file_name = f"{ args.output_dir_path }/val_images.npy"
    save_file(val_images, val_image_file_name)
    save_file(val_labels, val_label_file_name)
예제 #3
0
    def test_bad_args(self) -> None:
        with self.assertRaises(NotImplementedError):
            PathManager.copy(
                self._remote_uri,
                self._remote_uri,
                foo="foo"  # type: ignore
            )
        with self.assertRaises(NotImplementedError):
            PathManager.exists(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.get_local_path(
                self._remote_uri,
                foo="foo"  # type: ignore
            )
        with self.assertRaises(NotImplementedError):
            PathManager.isdir(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.isfile(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.ls(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.mkdirs(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(ValueError):
            PathManager.open(self._remote_uri, foo="foo")  # type: ignore
        with self.assertRaises(NotImplementedError):
            PathManager.rm(self._remote_uri, foo="foo")  # type: ignore

        PathManager.set_strict_kwargs_checking(False)

        PathManager.get_local_path(self._remote_uri, foo="foo")  # type: ignore
        f = PathManager.open(self._remote_uri, foo="foo")  # type: ignore
        f.close()
        PathManager.set_strict_kwargs_checking(True)
예제 #4
0
    def _build_model(self):
        """
        - Builds and returns model used for task. The returned model is not copied to
          gpu yet (if using gpu) and neither wrapped with DDP yet. This is done later
          by self.prepare()

        - We also convert the model BatchNorm layers to SyncBatchNorm if user
          has set the config option. We support PyTorch and Apex SyncBatchNorms
          both.

        - If the model is set to be in evaluation model and the full model must be frozen,
          we freeze the model.

        - If the model must be initialized from a checkpoint or user passed weights file
          we initialize the model from the checkpoint or the weights.
        """
        logging.info("Building model....")

        # Instantiate the raw model as specified
        model = build_model(self.config["MODEL"], self.config["OPTIMIZER"])

        # Convert the BatchNorm layers to SyncBatchNorm if needed
        # Both Apex and Pytorch SyncBatchNorms are GPU only
        if (self.config["MODEL"]["SYNC_BN_CONFIG"]["CONVERT_BN_TO_SYNC_BN"]
                and self.config["MACHINE"]["DEVICE"] == "gpu"):
            model = convert_sync_bn(self.config, model)

        # Enforce eval mode, no matter what the prior tranforms have done.
        # For instance apex converts batch-norms and sets `requires_grad` to True
        if self.config["MODEL"]["FEATURE_EVAL_SETTINGS"]["EVAL_MODE_ON"]:
            if self.config["MODEL"]["FEATURE_EVAL_SETTINGS"][
                    "FREEZE_TRUNK_ONLY"]:
                logging.info(
                    "config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_ONLY=True, "
                    "will freeze trunk...")
                model.freeze_trunk()
            elif self.config["MODEL"]["FEATURE_EVAL_SETTINGS"][
                    "FREEZE_TRUNK_AND_HEAD"]:
                logging.info(
                    "config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_AND_HEAD=True, will "
                    "freeze trunk and head...")
                model.freeze_head_and_trunk()

        # assert that if the user set the PARAMS_FILE, it must exist and be valid.
        if (self.checkpoint_path is None
                and self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"]):
            assert PathManager.exists(
                self.config["MODEL"]["WEIGHTS_INIT"]
                ["PARAMS_FILE"]), "Specified PARAMS_FILE does NOT exist"
        # If we want to initialize the model in case of finetuning or evaluation,
        # we do it here. But we check that there is no checkpoint existing before
        # This is important in cases when the model training dies.
        if (self.checkpoint_path is None
                and self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"]
                and PathManager.exists(
                    self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"])):
            model = self._restore_model_weights(model)

        return model
예제 #5
0
def check_data_exists(data_files):
    """
    Check that the input data files exist. If the data_files is a list,
    we iteratively check for each file in the list.
    """
    if isinstance(data_files, list):
        return np.all([PathManager.exists(item) for item in data_files])
    else:
        return PathManager.exists(data_files)
예제 #6
0
 def test_rm(self):
     with open(os.path.join(self._tmpdir, "test_rm.txt"), "w") as f:
         rm_file = f.name
         f.write(self._tmpfile_contents)
         f.flush()
     self.assertTrue(PathManager.exists(rm_file))
     self.assertTrue(PathManager.isfile(rm_file))
     PathManager.rm(rm_file)
     self.assertFalse(PathManager.exists(rm_file))
     self.assertFalse(PathManager.isfile(rm_file))
예제 #7
0
 def on_start(self, task) -> None:
     if not is_primary() or getattr(task, "test_only", False):
         return
     if not PathManager.exists(self.torchscript_folder):
         err_msg = "Torchscript folder '{}' does not exist.".format(
             self.torchscript_folder)
         raise FileNotFoundError(err_msg)
예제 #8
0
def get_images_labels_info(split, args):
    assert PathManager.exists(
        args.data_source_dir), "Data source NOT found. Abort!"
    data_dir = f"{args.data_source_dir}/{split}"
    class_idx = get_all_classes(data_dir)
    logger.info("Number of classes in {} data: {}".format(
        split, len(class_idx)))
    all_classes = class_idx.keys()
    image_paths, image_classes, img_ids = [], [], []
    for class_name in all_classes:
        class_label = class_idx[class_name]
        class_dir = f"{data_dir}/{class_name}"
        # get all the images in this dir
        for item in os.listdir(class_dir):
            if item not in [".", ".."]:
                image_paths.append(f"{class_dir}/{item}")
                img_ids.append(f"{class_name}/{item}")
                image_classes.append(class_label)
    output_dict = {}
    if args.generate_json:
        for idx in range(len(img_ids)):
            id = img_ids[idx]
            lbl = image_classes[idx]
            output_dict[id] = lbl
    return image_paths, image_classes, output_dict
예제 #9
0
def get_eval_dataset(cfg, root_dataset_path, eval_dataset_name, eval_binary_path):
    eval_data_path = f"{root_dataset_path}/{eval_dataset_name}"
    assert PathManager.exists(eval_data_path), f"Unknown path: {eval_data_path}"

    num_samples = (
        None
        if cfg.IMG_RETRIEVAL.NUM_DATABASE_SAMPLES == -1
        else cfg.IMG_RETRIEVAL.NUM_DATABASE_SAMPLES
    )

    if is_revisited_dataset(eval_dataset_name):
        eval_dataset = RevisitedInstanceRetrievalDataset(
            eval_dataset_name, root_dataset_path, num_samples=num_samples
        )
    elif is_instre_dataset(eval_dataset_name):
        eval_dataset = InstreDataset(eval_data_path, num_samples=num_samples)
    elif is_copdays_dataset(eval_dataset_name):
        eval_dataset = CopyDaysDataset(
            data_path=eval_data_path,
            num_samples=num_samples,
            use_distractors=cfg.IMG_RETRIEVAL.USE_DISTRACTORS,
        )
    else:
        eval_dataset = InstanceRetrievalDataset(
            eval_data_path, eval_binary_path, num_samples=num_samples
        )
    return eval_dataset
예제 #10
0
    def step(self, iteration: int, **kwargs: Any) -> None:
        """
        Perform the appropriate action at the given iteration.

        Args:
            iteration (int): the current iteration, ranged in [0, max_iter-1].
            kwargs (Any): extra data to save, same as in
                :meth:`Checkpointer.save`.
        """
        iteration = int(iteration)
        additional_state = {"iteration": iteration}
        additional_state.update(kwargs)
        if (iteration + 1) % self.period == 0:
            self.checkpointer.save("model_{:07d}".format(iteration), **additional_state)

            if self.max_to_keep is not None:
                self.recent_checkpoints.append(self.checkpointer.get_checkpoint_file())
                # pyre-fixme[6]: Expected `int` for 1st param but got `Optional[int]`.
                # pyre-fixme[6]: Expected `int` for 1st param but got `Optional[int]`.
                if len(self.recent_checkpoints) > self.max_to_keep:
                    file_to_delete = self.recent_checkpoints.pop(0)
                    if PathManager.exists(
                        file_to_delete
                    ) and not file_to_delete.endswith("model_final.pth"):
                        PathManager.rm(file_to_delete)

        if iteration >= self.max_iter - 1:  # pyre-ignore
            self.checkpointer.save("model_final", **additional_state)
예제 #11
0
def build_retrieval_model(cfg):
    """
    Builds the model on 1-gpu and initializes from the weight.
    """
    logging.info("Building model....")
    model = build_model(cfg.MODEL, cfg.OPTIMIZER)
    if PathManager.exists(cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE):
        init_weights_path = cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE
        logging.info(f"Initializing model from: {init_weights_path}")
        weights = torch.load(init_weights_path,
                             map_location=torch.device("cuda"))
        skip_layers = cfg.MODEL.WEIGHTS_INIT.get("SKIP_LAYERS", [])
        replace_prefix = cfg.MODEL.WEIGHTS_INIT.get("REMOVE_PREFIX", None)
        append_prefix = cfg.MODEL.WEIGHTS_INIT.get("APPEND_PREFIX", None)
        state_dict_key_name = cfg.MODEL.WEIGHTS_INIT.get(
            "STATE_DICT_KEY_NAME", None)

        init_model_from_consolidated_weights(
            cfg,
            model,
            weights,
            state_dict_key_name=state_dict_key_name,
            skip_layers=skip_layers,
            replace_prefix=replace_prefix,
            append_prefix=append_prefix,
        )
    else:
        # We only throw the warning if not weights file is provided. We want to
        # benchmark the random initialization model too and hence support that.
        logging.warning("Model is randomly initialized....")
    logging.info(f"Model is:\n {model}")
    return model
예제 #12
0
 def load_input_data(self, data_file, targets_file):
     """
     Given the input data (features) and targets (labels) files, load the
     features of shape N x D and labels of shape (N,)
     """
     assert PathManager.exists(data_file), "Data file not found. Abort!"
     assert PathManager.exists(
         targets_file), "Targets file not found. Abort!"
     # load the features and the targets
     logging.info("loading features and targets...")
     targets = load_file(targets_file)
     features = np.array(load_file(data_file)).astype(np.float64)
     assert features.shape[0] == targets.shape[0], "Mismatched #images"
     logging.info(
         f"Loaded features: {features.shape} and targets: {targets.shape}")
     return features, targets
예제 #13
0
    def step(self, iteration: int, **kwargs: Any):
        """
        Perform the appropriate action at the given iteration.

        Args:
            iteration (int): the current iteration, ranged in [0, max_iter-1].
            kwargs (Any): extra data to save, same as in
                :meth:`Checkpointer.save`.
        """
        iteration = int(iteration)
        additional_state = {"iteration": iteration}
        additional_state.update(kwargs)
        if (iteration + 1) % self.period == 0:
            self.checkpointer.save("model_{:07d}".format(iteration),
                                   **additional_state)

            if self.max_to_keep is not None:
                all_checkpoint_files = (
                    self.checkpointer.get_all_checkpoint_files())
                all_checkpoint_files = [
                    item for item in all_checkpoint_files
                    if not item.endswith("model_final.pth")
                ]
                all_checkpoint_files.sort()
                files_to_delete = all_checkpoint_files[:-self.max_to_keep]

                for file in files_to_delete:
                    if PathManager.exists(file):
                        PathManager.rm(file)

        if iteration >= self.max_iter - 1:
            self.checkpointer.save("model_final", **additional_state)
예제 #14
0
 def on_start(self, task) -> None:
     if not is_master() or getattr(task, "test_only", False):
         return
     if not PathManager.exists(self.checkpoint_folder):
         err_msg = "Checkpoint folder '{}' does not exist.".format(
             self.checkpoint_folder)
         raise FileNotFoundError(err_msg)
예제 #15
0
def get_data_files(split, args):
    data_dir = f"{args.data_source_dir}/ImageSets/Main"
    assert PathManager.exists(data_dir), "Data: {} doesn't exist".format(
        data_dir)
    test_data_files = glob(os.path.join(data_dir, "*_test.txt"))
    test_data_files = validate_files(test_data_files)
    if args.separate_partitions > 0:
        train_data_files = glob(os.path.join(data_dir, "*_train.txt"))
        val_data_files = glob(os.path.join(data_dir, "*_val.txt"))
        train_data_files = validate_files(train_data_files)
        val_data_files = validate_files(val_data_files)
        assert len(train_data_files) == len(val_data_files)
        if split == "train":
            data_files = train_data_files
        elif split == "test":
            data_files = test_data_files
        else:
            data_files = val_data_files
    else:
        train_data_files = glob(os.path.join(data_dir, "*_trainval.txt"))
        if len(test_data_files) == 0:
            # For VOC2012 dataset, we have trainval, val and train data.
            train_data_files = glob(os.path.join(data_dir, "*_train.txt"))
            test_data_files = glob(os.path.join(data_dir, "*_val.txt"))
        test_data_files = validate_files(test_data_files)
        train_data_files = validate_files(train_data_files)
        data_files = train_data_files if (split
                                          == "train") else test_data_files
    assert len(train_data_files) == len(test_data_files), "Missing classes"
    return data_files
예제 #16
0
def get_coco_imgs_labels_info(split, data_source_dir, args):
    # pycocotools is an optional dependency for VISSL
    from pycocotools.coco import COCO

    json_file = f"{data_source_dir}/annotations/instances_{split}2014.json"
    assert PathManager.exists(
        json_file), "Annotations file does not exist. Abort"
    json_data = json.load(PathManager.open(json_file, "r"))
    image_index = [x["id"] for x in json_data["images"]]
    coco = COCO(json_file)

    num_cats = len(json_data["categories"])
    logging.info("partition: {} num_cats: {} num_images: {}".format(
        split, num_cats, len(image_index)))
    cat_ids = [x["id"] for x in json_data["categories"]]
    coco_to_me = {val: ind for ind, val in enumerate(cat_ids)}
    cat_names = [str(x["name"]) for x in json_data["categories"]]
    cat_name_to_id, cat_id_to_name = {}, {}
    for ind, name in enumerate(cat_names):
        cat_name_to_id[name] = ind
        cat_id_to_name[ind] = name

    class_ids = cat_id_to_name.keys()
    assert len(list(class_ids)) == num_cats
    assert min(class_ids) == 0
    assert max(class_ids) == len(class_ids) - 1
    assert len(set(class_ids)) == len(class_ids)
    # label_matrix = np.zeros((len(image_index), len(cat_names)), dtype=np.float32)
    # area_matrix = np.zeros((len(image_index), len(cat_names)), dtype=np.float32)
    img_labels_map = {}
    num_classes = len(cat_names)
    for _, im_id in enumerate(image_index):
        ann_ids = coco.getAnnIds(imgIds=im_id)
        entry = coco.imgs[im_id]
        img_name = entry["file_name"]
        objs = coco.loadAnns(ann_ids)
        valid_objs = get_valid_objs(entry, objs)
        if img_name not in img_labels_map:
            img_labels_map[img_name] = np.zeros(num_classes, dtype=np.int32)
        for _, obj in enumerate(valid_objs):
            cocoCatId = obj["category_id"]
            myId = coco_to_me[cocoCatId]
            img_labels_map[img_name][myId] = 1.0

    # label = 1 (present), 0 (not present)
    img_paths, img_labels = [], []
    train_imgs_path = f"{data_source_dir}/train2014"
    val_imgs_path = f"{data_source_dir}/val2014"
    prefix = train_imgs_path if split == "train" else val_imgs_path
    for item in sorted(img_labels_map.keys()):
        img_paths.append(f"{prefix}/{item}")
        img_labels.append(img_labels_map[item])

    # save to the datasets folder and return the path
    output_dir = get_output_dir()
    img_info_out_path = f"{output_dir}/{split}_images.npy"
    label_info_out_path = f"{output_dir}/{split}_labels.npy"
    save_file(np.array(img_paths), img_info_out_path)
    save_file(np.array(img_labels), label_info_out_path)
    return [img_info_out_path, label_info_out_path]
예제 #17
0
    def merge(self):
        """merge all clip features of a video into one/several 
           fix-size matrix(es)
        """
        if not PathManager.exists(self.merge_dir):
            PathManager.mkdirs(self.merge_dir)

        for video_name in PathManager.ls(self.save_dir):
            video_dir = os.path.join(self.save_dir, video_name)
            num_feats = len(PathManager.ls(video_dir))

            if self.min_length <= num_feats <= self.max_length:
                merged_feat = torch.zeros((num_feats, self.dim),
                                          dtype=torch.float32)

                for clip_idx in range(num_feats):
                    feat = torch.load(
                        os.path.join(video_dir, f'{clip_idx}.pth'))
                    merged_feat[clip_idx, :] = torch.from_numpy(feat)

                torch.save(merged_feat,
                           os.path.join(self.merge_dir, f'{video_name}.pth'))
            else:
                # TODO
                print(video_name)
예제 #18
0
 def process_train_image(i, out_dir):
     if i % LOG_FREQUENCY == 0:
         logging.info(f"Train Image: {i}"),
     fname_out = f"{out_dir}/{i}.npy"
     if PathManager.exists(fname_out):
         feat = load_file(fname_out)
         train_features.append(feat)
     else:
         fname_in = train_dataset.get_filename(i)
         if is_revisited_dataset(train_dataset_name):
             img = image_helper.load_and_prepare_revisited_image(fname_in)
         elif is_whiten_dataset(train_dataset_name):
             img = image_helper.load_and_prepare_whitening_image(fname_in)
         else:
             img = image_helper.load_and_prepare_image(fname_in, roi=None)
         v = torch.autograd.Variable(img.unsqueeze(0))
         vc = v.cuda()
         # the model output is a list always.
         activation_map = model(vc)[0].cpu()
         # once we have the features,
         # we can perform: rmac | gem pooling | l2 norm
         if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "rmac":
             descriptors = get_rmac_descriptors(activation_map,
                                                spatial_levels)
         else:
             descriptors = activation_map
         save_file(descriptors.data.numpy(), fname_out)
         train_features.append(descriptors.data.numpy())
예제 #19
0
def get_train_dataset(cfg, root_dataset_path, train_dataset_name,
                      eval_binary_path):
    # We only create the train dataset if we need PCA or whitening training.
    # Otherwise not.
    if cfg.IMG_RETRIEVAL.SHOULD_TRAIN_PCA_OR_WHITENING:
        train_data_path = f"{root_dataset_path}/{train_dataset_name}"
        assert PathManager.exists(
            train_data_path), f"Unknown path: {train_data_path}"

        num_samples = 10 if cfg.IMG_RETRIEVAL.DEBUG_MODE else None

        if is_revisited_dataset(train_dataset_name):
            train_dataset = RevisitedInstanceRetrievalDataset(
                train_dataset_name, root_dataset_path)
        elif is_whiten_dataset(train_dataset_name):
            train_dataset = WhiteningTrainingImageDataset(
                train_data_path,
                cfg.IMG_RETRIEVAL.WHITEN_IMG_LIST,
                num_samples=num_samples,
            )
        else:
            train_dataset = InstanceRetrievalDataset(train_data_path,
                                                     eval_binary_path,
                                                     num_samples=num_samples)
    else:
        train_dataset = None
    return train_dataset
예제 #20
0
def convert_and_save_model(args, replace_prefix):
    assert PathManager.exists(
        args.output_dir), "Output directory does NOT exist"

    # load the model
    model_path = args.model_url_or_file
    if is_url(model_path):
        logger.info(f"Loading from url: {model_path}")
        model = load_state_dict_from_url(model_path)
    else:
        model = torch.load(model_path, map_location=torch.device("cpu"))

    # get the model trunk to rename
    if "classy_state_dict" in model.keys():
        model_trunk = model["classy_state_dict"]["base_model"]["model"][
            "trunk"]
    elif "model_state_dict" in model.keys():
        model_trunk = model["model_state_dict"]
    else:
        model_trunk = model
    logger.info(
        f"Input model loaded. Number of params: {len(model_trunk.keys())}")

    # convert the trunk
    converted_model = replace_module_prefix(model_trunk, "_feature_blocks.")
    logger.info(
        f"Converted model. Number of params: {len(converted_model.keys())}")

    # save the state
    output_filename = f"converted_vissl_{args.output_name}.torch"
    output_model_filepath = f"{args.output_dir}/{output_filename}"
    logger.info(f"Saving model: {output_model_filepath}")
    torch.save(converted_model, output_model_filepath)
    logger.info("DONE!")
예제 #21
0
    def _restore_model_weights(self, model):
        """
        If using a weights file to initialize the model, we load the weights
        and initialize the model. Since the weights file specified
        by user might not be VISSL trained weights, we expose several config
        options like APPEND_PREFIX, etc to allow successful loading of the weights.
        See MODEL.WEIGHTS_INIT description in vissl/config/defaults.yaml for details.
        """
        params_from_file = self.config["MODEL"]["WEIGHTS_INIT"]
        init_weights_path = params_from_file["PARAMS_FILE"]
        assert init_weights_path, "Shouldn't call this when init_weight_path is empty"
        logging.info(f"Initializing model from: {init_weights_path}")

        if PathManager.exists(init_weights_path):
            weights = load_and_broadcast_checkpoint(init_weights_path,
                                                    device=torch.device("cpu"))
            skip_layers = params_from_file.get("SKIP_LAYERS", [])
            replace_prefix = params_from_file.get("REMOVE_PREFIX", None)
            append_prefix = params_from_file.get("APPEND_PREFIX", None)
            state_dict_key_name = params_from_file.get("STATE_DICT_KEY_NAME",
                                                       None)

            # we initialize the weights from this checkpoint. However, we
            # don't care about the other metadata like iteration number etc.
            # So the method only reads the state_dict
            init_model_from_weights(
                self.config,
                model,
                weights,
                state_dict_key_name=state_dict_key_name,
                skip_layers=skip_layers,
                replace_prefix=replace_prefix,
                append_prefix=append_prefix,
            )
        return model
예제 #22
0
    def _construct_loader(self):
        """
        Construct the video loader.
        """
        path_to_file = os.path.join(
            self.cfg.DATA.PATH_TO_DATA_DIR,
            "{}.csv".format("train" if self.mode == "train" else "val"),
        )
        assert PathManager.exists(path_to_file), "{} dir not found".format(
            path_to_file)
        (self._path_to_videos,
         self._labels) = utils.load_image_lists(path_to_file,
                                                self.cfg.DATA.PATH_PREFIX,
                                                return_list=True)

        if self.mode != "train":
            # Form video-level labels from frame level annotations.
            self._labels = utils.convert_to_video_level_labels(self._labels)

        self._path_to_videos = list(
            chain.from_iterable([[x] * self._num_clips
                                 for x in self._path_to_videos]))
        self._labels = list(
            chain.from_iterable([[x] * self._num_clips for x in self._labels]))
        self._spatial_temporal_idx = list(
            chain.from_iterable(
                [range(self._num_clips) for _ in range(len(self._labels))]))

        logger.info(
            "Charades dataloader constructed (size: {}) from {}".format(
                len(self._path_to_videos), path_to_file))
예제 #23
0
 def has_checkpoint(self) -> bool:
     """
     Returns:
         bool: whether a checkpoint exists in the target directory.
     """
     save_file = os.path.join(self.save_dir, "last_checkpoint")
     return PathManager.exists(save_file)
예제 #24
0
    def _construct_loader(self):
        """
        Construct the video loader.
        """
        path_to_file = os.path.join(self.cfg.DATA.PATH_TO_DATA_DIR,
                                    "{}.txt".format(self.mode))
        assert PathManager.exists(path_to_file), "{} dir not found".format(
            path_to_file)

        self._path_to_videos = []
        self._labels = []
        self._duration = []
        self._spatial_temporal_idx = []
        with PathManager.open(path_to_file, "r") as f:
            for clip_idx, path_label in enumerate(f.read().splitlines()):
                path, start, end, label = path_label.split()
                for idx in range(self._num_clips):
                    self._path_to_videos.append(
                        os.path.join(self.cfg.DATA.PATH_PREFIX, path))
                    self._labels.append(int(label))
                    self._duration.append((float(start), float(end)))
                    self._spatial_temporal_idx.append(idx)
        assert (len(self._path_to_videos) >
                0), "Failed to load Alimedia split {} from {}".format(
                    self._split_idx, path_to_file)
        logger.info(
            "Constructing Alimedia dataloader (size: {}) from {}".format(
                len(self._path_to_videos), path_to_file))
        self._path_to_videos = np.array(self._path_to_videos, dtype=np.string_)
예제 #25
0
 def _evaluate_checkpoints(self):
     for checkpoint_str, benchmarks in self.evaluation_results.items():
         # TODO: Can we possible retrieve this from CheckpointWriter, to consolidate logic.
         checkpoint_str = os.path.join(self.training_config.CHECKPOINT.DIR,
                                       f"{ checkpoint_str }.torch")
         if PathManager.exists(checkpoint_str):
             self._evaluate_checkpoint(checkpoint_str, benchmarks)
예제 #26
0
파일: utils.py 프로젝트: ys-0-sy/translate
def load_monolingual_dataset(
    bin_path,
    is_source=False,
    char_source_dict=None,
    log_verbose=True,
    num_examples_limit: Optional[int] = None,
):
    if log_verbose:
        print("Starting to load binarized monolingual data file.", flush=True)

    if not PathManager.exists(bin_path):
        raise ValueError(f"Monolingual binary path {bin_path} not found!")

    if char_source_dict is not None and is_source:
        dataset = char_data.InMemoryNumpyWordCharDataset.create_from_file(
            path=bin_path)

    else:
        dataset = pytorch_translate_data.InMemoryIndexedDataset.create_from_file(
            path=bin_path, num_examples_limit=num_examples_limit)

    if log_verbose:
        print(f"Finished loading dataset {bin_path}", flush=True)

    print(f"""| Loaded {len(dataset)} monolingual examples for """
          f"""{"source" if is_source else "target"}""")
    return dataset
예제 #27
0
def convert_to_coco_json(dataset_name, output_file, allow_cached=True):
    """
    Converts dataset into COCO format and saves it to a json file.
    dataset_name must be registered in DatasetCatalog and in detectron2's standard format.

    Args:
        dataset_name:
            reference from the config file to the catalogs
            must be registered in DatasetCatalog and in detectron2's standard format
        output_file: path of json file that will be saved to
        allow_cached: if json file is already present then skip conversion
    """

    # TODO: The dataset or the conversion script *may* change,
    # a checksum would be useful for validating the cached data

    PathManager.mkdirs(os.path.dirname(output_file))
    with file_lock(output_file):
        if PathManager.exists(output_file) and allow_cached:
            logger.warning(
                f"Using previously cached COCO format annotations at '{output_file}'. "
                "You need to clear the cache file if your dataset has been modified."
            )
        else:
            logger.info(
                f"Converting annotations of dataset '{dataset_name}' to COCO format ...)"
            )
            coco_dict = convert_to_coco_dict(dataset_name)

            logger.info(
                f"Caching COCO format annotations at '{output_file}' ...")
            with PathManager.open(output_file, "w") as f:
                json.dump(coco_dict, f)
예제 #28
0
def get_local_path(input_file, dest_dir):
    """
    If user specified copying data to a local directory,
    get the local path where the data files were copied.

    - If input_file is just a file, we return the dest_dir/filename
    - If the intput_file is a directory, then we check if the
      environemt is SLURM and use slurm_dir or otherwise dest_dir
      to look up copy_complete file is available.
      If available, we return the directory.
    - If both above fail, we return the input_file as is.
    """
    out = ""
    if PathManager.isfile(input_file):
        out = os.path.join(dest_dir, os.path.basename(input_file))
    elif PathManager.isdir(input_file):
        data_name = input_file.strip("/").split("/")[-1]
        if "SLURM_JOBID" in os.environ:
            dest_dir = get_slurm_dir(dest_dir)
        dest_dir = os.path.join(dest_dir, data_name)
        complete_flag = os.path.join(dest_dir, "copy_complete")
        if PathManager.isfile(complete_flag):
            out = dest_dir
    if PathManager.exists(out):
        return out
    else:
        return input_file
예제 #29
0
    def _construct_loader(self):
        """
        Construct the video loader.
        """
        path_to_file = os.path.join(self.cfg.DATA.PATH_TO_DATA_DIR,
                                    "{}.csv".format(self.mode))
        assert PathManager.exists(path_to_file), "{} dir not found".format(
            path_to_file)

        self._path_to_videos = []
        self._labels = []
        self._spatial_temporal_idx = []
        with PathManager.open(path_to_file, "r") as f:
            for clip_idx, path_label in enumerate(f.read().splitlines()):
                assert (len(
                    path_label.split(self.cfg.DATA.PATH_LABEL_SEPARATOR)) == 2)
                path, label = path_label.split(
                    self.cfg.DATA.PATH_LABEL_SEPARATOR)
                for idx in range(self._num_clips):
                    self._path_to_videos.append(
                        os.path.join(self.cfg.DATA.PATH_PREFIX, path))
                    self._labels.append(int(label))
                    self._spatial_temporal_idx.append(idx)
                    self._video_meta[clip_idx * self._num_clips + idx] = {}
        assert (len(self._path_to_videos) >
                0), "Failed to load Kinetics split {} from {}".format(
                    self._split_idx, path_to_file)
        logger.info(
            "Constructing kinetics dataloader (size: {}) from {}".format(
                len(self._path_to_videos), path_to_file))
예제 #30
0
def main():
    parser = argparse.ArgumentParser(
        description="Sample Low-shot data for Places/VOC")
    parser.add_argument(
        "--dataset_name",
        type=str,
        default=None,
        help=
        "choose between places | voc. These are valid choices if your dataset is similar",
    )
    parser.add_argument(
        "--layername",
        type=str,
        default=None,
        help="Layer for which low shot is being general. Valid for voc07 only",
    )
    parser.add_argument(
        "--targets_data_file",
        type=str,
        default=None,
        help="Numpy file containing image labels",
    )
    parser.add_argument(
        "--images_data_file",
        type=str,
        default=None,
        help="Numpy file containing images information",
    )
    parser.add_argument(
        "--output_path",
        type=str,
        default=None,
        help="path where low-shot samples should be saved",
    )
    parser.add_argument(
        "--k_values",
        type=str,
        default="1,2,4,8,16,32,64,96",
        help="Low-shot k-values for svm testing.",
    )
    parser.add_argument("--num_samples",
                        type=int,
                        default=5,
                        help="Number of independent samples.")
    opts = parser.parse_args()

    assert PathManager.exists(
        opts.targets_data_file), "Target file not found. Abort"
    targets = load_file(opts.targets_data_file)
    sample_ids = list(range(1, 1 + opts.num_samples))

    generate_low_shot_samples(
        opts.dataset_name,
        targets,
        opts.k_values,
        sample_ids,
        opts.output_path,
        opts.layername,
        opts.images_data_file,
    )