def train_cls(self, features, targets, cls_num): """ Train SVM on the input features and targets for a given class. The SVMs are trained for all costs values for the given class. We also save the cross-validation AP at each cost value for the given class. """ logging.info(f"Training cls: {cls_num}") for cost_idx in range(len(self.costs_list)): cost = self.costs_list[cost_idx] out_file, ap_out_file = self._get_svm_model_filename(cls_num, cost) if (g_pathmgr.exists(out_file) and g_pathmgr.exists(ap_out_file) and not self.config.force_retrain): logging.info(f"SVM model exists: {out_file}") logging.info(f"AP file exists: {ap_out_file}") continue logging.info( f"Training model with the cost: {cost} cls: {cls_num}") clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=1, penalty=self.config["penalty"], loss=self.config["loss"], tol=0.0001, dual=self.config["dual"], max_iter=self.config["max_iter"], ) cls_labels = targets[:, cls_num].astype(dtype=np.int32, copy=True) # meaning of labels in VOC/COCO original loaded target files: # label 0 = not present, set it to -1 as svm train target # label 1 = present. Make the svm train target labels as -1, 1. cls_labels[np.where(cls_labels == 0)] = -1 num_positives = len(np.where(cls_labels == 1)[0]) num_negatives = len(cls_labels) - num_positives logging.info( f"cls: {cls_num} has +ve: {num_positives} -ve: {num_negatives} " f"ratio: {float(num_positives) / num_negatives} " f"features: {features.shape} cls_labels: {cls_labels.shape}") ap_scores = cross_val_score( clf, features, cls_labels, cv=self.config["cross_val_folds"], scoring="average_precision", ) self.train_ap_matrix[cls_num][cost_idx] = ap_scores.mean() clf.fit(features, cls_labels) logging.info(f"cls: {cls_num} cost: {cost} AP: {ap_scores} " f"mean:{ap_scores.mean()}") logging.info(f"Saving cls cost AP to: {ap_out_file}") save_file(np.array([ap_scores.mean()]), ap_out_file) logging.info(f"Saving SVM model to: {out_file}") with g_pathmgr.open(out_file, "wb") as fwrite: pickle.dump(clf, fwrite)
def main(): parser = argparse.ArgumentParser( description="Create the iNaturalist2018 data information file." ) parser.add_argument( "-i", "--input_dir_path", type=str, help="Path to the parent directory of the iNaturalist2018 data set", ) parser.add_argument( "-o", "--output_dir_path", type=str, help="Folder where the classification dataset will be written", ) parser.add_argument( "-d", "--download", action="store_const", const=True, default=False, help="To download the original dataset and decompress it in the input folder", ) args = parser.parse_args() # Make sure that the input and output directories exist. assert g_pathmgr.exists( args.input_dir_path ), "Data input directory not found! Please create the directory" assert g_pathmgr.exists( args.output_dir_path ), "Data output directory not found! Please create the directory" # Download dataset to input path if args.download: download_dataset(args.input_dir_path) # Process training and validation datasets into numpy arrays logger.info("========Preparing train data files========") train_images, train_labels = get_images_labels_info( "/train2018.json", args.input_dir_path ) logger.info("========Preparing val data files========") val_images, val_labels = get_images_labels_info( "/val2018.json", args.input_dir_path ) # Save as numpy files to output path logger.info("=================Saving train data files=======================") train_label_file_name = f"{ args.output_dir_path }/train_labels.npy" train_image_file_name = f"{ args.output_dir_path }/train_images.npy" save_file(train_images, train_image_file_name) save_file(train_labels, train_label_file_name) logger.info("=================Saving val data files=======================") val_label_file_name = f"{ args.output_dir_path }/val_labels.npy" val_image_file_name = f"{ args.output_dir_path }/val_images.npy" save_file(val_images, val_image_file_name) save_file(val_labels, val_label_file_name)
def _build_model(self, strict_load: bool = False): """ - Builds and returns model used for task. The returned model is not copied to gpu yet (if using gpu) and neither wrapped with DDP yet. This is done later by self.prepare() - We also convert the model BatchNorm layers to SyncBatchNorm if user has set the config option. We support PyTorch and Apex SyncBatchNorms both. - If the model is set to be in evaluation model and the full model must be frozen, we freeze the model. - If the model must be initialized from a checkpoint or user passed weights file we initialize the model from the checkpoint or the weights. """ logging.info("Building model....") # Instantiate the raw model as specified model = build_model(self.config["MODEL"], self.config["OPTIMIZER"]) # Convert the BatchNorm layers to SyncBatchNorm if needed # Both Apex and Pytorch SyncBatchNorms are GPU only if (self.config["MODEL"]["SYNC_BN_CONFIG"]["CONVERT_BN_TO_SYNC_BN"] and self.config["MACHINE"]["DEVICE"] == "gpu"): model = convert_sync_bn(self.config, model) # Enforce eval mode, no matter what the prior tranforms have done. # For instance apex converts batch-norms and sets `requires_grad` to True if self.config["MODEL"]["FEATURE_EVAL_SETTINGS"]["EVAL_MODE_ON"]: if self.config["MODEL"]["FEATURE_EVAL_SETTINGS"][ "FREEZE_TRUNK_ONLY"]: logging.info( "config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_ONLY=True, " "will freeze trunk...") model.freeze_trunk() elif self.config["MODEL"]["FEATURE_EVAL_SETTINGS"][ "FREEZE_TRUNK_AND_HEAD"]: logging.info( "config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_AND_HEAD=True, will " "freeze trunk and head...") model.freeze_head_and_trunk() # assert that if the user set the PARAMS_FILE, it must exist and be valid. if (self.checkpoint_path is None and self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"]): assert g_pathmgr.exists( self.config["MODEL"]["WEIGHTS_INIT"] ["PARAMS_FILE"]), "Specified PARAMS_FILE does NOT exist" # If we want to initialize the model in case of finetuning or evaluation, # we do it here. But we check that there is no checkpoint existing before # This is important in cases when the model training dies. if (self.checkpoint_path is None and self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"] and g_pathmgr.exists( self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"])): model = self._restore_model_weights(model, strict=strict_load) return model
def check_data_exists(data_files): """ Check that the input data files exist. If the data_files is a list, we iteratively check for each file in the list. """ if isinstance(data_files, list): return np.all([g_pathmgr.exists(item) for item in data_files]) else: return g_pathmgr.exists(data_files)
def save_img_labels_filelist(img_paths, img_labels, out_image_filepath, out_label_filepath): # Remove the split .npy filelist if they exist and resave them. if g_pathmgr.exists(out_image_filepath): g_pathmgr.rm(out_image_filepath) save_file(img_paths, out_image_filepath) print(f"Saved: {out_image_filepath}") if g_pathmgr.exists(out_label_filepath): g_pathmgr.rm(out_label_filepath) save_file(img_labels, out_label_filepath) print(f"Saved: {out_label_filepath}") print("Saved!!")
def load_input_data(self, data_file, targets_file): """ Given the input data (features) and targets (labels) files, load the features of shape N x D and labels of shape (N,) """ assert g_pathmgr.exists(data_file), "Data file not found. Abort!" assert g_pathmgr.exists(targets_file), "Targets file not found. Abort!" # load the features and the targets logging.info("loading features and targets...") targets = load_file(targets_file) features = np.array(load_file(data_file)).astype(np.float64) assert features.shape[0] == targets.shape[0], "Mismatched #images" logging.info( f"Loaded features: {features.shape} and targets: {targets.shape}") return features, targets
def create_submitit_executor(cfg: AttrDict): """ Utility function to create a SLURM submitit executor, which is able to schedule arbitrary functions on a SLURM cluster The configuration of the executor is derived from the SLURM part of the VISSL configuration provided as parameter """ import submitit log_folder = cfg.SLURM.LOG_FOLDER makedir(log_folder) assert g_pathmgr.exists( log_folder ), f"Specified config.SLURM.LOG_FOLDER={log_folder} doesn't exist" assert cfg.SLURM.PARTITION, "SLURM.PARTITION must be set when using SLURM" executor = submitit.AutoExecutor(folder=log_folder) timeout_min = cfg.SLURM.TIME_HOURS * 60 + cfg.SLURM.TIME_MINUTES executor.update_parameters( name=cfg.SLURM.NAME, slurm_comment=cfg.SLURM.COMMENT, slurm_partition=cfg.SLURM.PARTITION, slurm_constraint=cfg.SLURM.CONSTRAINT, timeout_min=timeout_min, nodes=cfg.DISTRIBUTED.NUM_NODES, cpus_per_task=cfg.SLURM.NUM_CPU_PER_PROC * cfg.DISTRIBUTED.NUM_PROC_PER_NODE, tasks_per_node=1, gpus_per_node=cfg.DISTRIBUTED.NUM_PROC_PER_NODE, mem_gb=cfg.SLURM.MEM_GB, slurm_additional_parameters=cfg.SLURM.ADDITIONAL_PARAMETERS, ) return executor
def _evaluate_checkpoints(self): for checkpoint_str, benchmarks in self.evaluation_results.items(): # TODO: Can we possible retrieve this from CheckpointWriter, to consolidate logic. checkpoint_str = os.path.join(self.training_config.CHECKPOINT.DIR, f"{ checkpoint_str }.torch") if g_pathmgr.exists(checkpoint_str): self._evaluate_checkpoint(checkpoint_str, benchmarks)
def build_retrieval_model(cfg): """ Builds the model on 1-gpu and initializes from the weight. """ logging.info("Building model....") model = build_model(cfg.MODEL, cfg.OPTIMIZER) if g_pathmgr.exists(cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE): init_weights_path = cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE logging.info(f"Initializing model from: {init_weights_path}") weights = load_checkpoint(init_weights_path, device=torch.device("cuda")) skip_layers = cfg.MODEL.WEIGHTS_INIT.get("SKIP_LAYERS", []) replace_prefix = cfg.MODEL.WEIGHTS_INIT.get("REMOVE_PREFIX", None) append_prefix = cfg.MODEL.WEIGHTS_INIT.get("APPEND_PREFIX", None) state_dict_key_name = cfg.MODEL.WEIGHTS_INIT.get( "STATE_DICT_KEY_NAME", None) init_model_from_consolidated_weights( cfg, model, weights, state_dict_key_name=state_dict_key_name, skip_layers=skip_layers, replace_prefix=replace_prefix, append_prefix=append_prefix, ) else: # We only throw the warning if not weights file is provided. We want to # benchmark the random initialization model too and hence support that. logging.warning("Model is randomly initialized....") logging.info(f"Model is:\n {model}") return model
def get_local_path(input_file, dest_dir): """ If user specified copying data to a local directory, get the local path where the data files were copied. - If input_file is just a file, we return the dest_dir/filename - If the intput_file is a directory, then we check if the environemt is SLURM and use slurm_dir or otherwise dest_dir to look up copy_complete file is available. If available, we return the directory. - If both above fail, we return the input_file as is. """ out = "" if g_pathmgr.isfile(input_file): out = os.path.join(dest_dir, os.path.basename(input_file)) elif g_pathmgr.isdir(input_file): data_name = input_file.strip("/").split("/")[-1] if "SLURM_JOBID" in os.environ: dest_dir = get_slurm_dir(dest_dir) dest_dir = os.path.join(dest_dir, data_name) complete_flag = os.path.join(dest_dir, "copy_complete") if g_pathmgr.isfile(complete_flag): out = dest_dir if g_pathmgr.exists(out): return out else: return input_file
def get_data_files(split, args): data_dir = f"{args.data_source_dir}/ImageSets/Main" assert g_pathmgr.exists(data_dir), "Data: {} doesn't exist".format(data_dir) test_data_files = glob(os.path.join(data_dir, "*_test.txt")) test_data_files = validate_files(test_data_files) if args.separate_partitions > 0: train_data_files = glob(os.path.join(data_dir, "*_train.txt")) val_data_files = glob(os.path.join(data_dir, "*_val.txt")) train_data_files = validate_files(train_data_files) val_data_files = validate_files(val_data_files) assert len(train_data_files) == len(val_data_files) if split == "train": data_files = train_data_files elif split == "test": data_files = test_data_files else: data_files = val_data_files else: train_data_files = glob(os.path.join(data_dir, "*_trainval.txt")) if len(test_data_files) == 0: # For VOC2012 dataset, we have trainval, val and train data. train_data_files = glob(os.path.join(data_dir, "*_train.txt")) test_data_files = glob(os.path.join(data_dir, "*_val.txt")) test_data_files = validate_files(test_data_files) train_data_files = validate_files(train_data_files) data_files = train_data_files if (split == "train") else test_data_files assert len(train_data_files) == len(test_data_files), "Missing classes" return data_files
def launch_benchmark_suite_scheduler(config_file): assert g_pathmgr.exists( config_file), "Slurm evaluator config file must exist" user_config = load_file(config_file) config = _DEFAULT_CONFIG.copy() recursive_dict_merge(config, user_config) benchmark_suite_scheduler = BenchmarkSuiteScheduler(**config["params"]) benchmark_suite_scheduler_job = SlurmEvaluatorJob( benchmark_suite_scheduler=benchmark_suite_scheduler) executor = submitit.AutoExecutor( folder=benchmark_suite_scheduler.evaluation_dir()) assert "slurm_options" in config, "slurm_options must be specified" assert ( "PARTITION" in config["slurm_options"] ), "slurm_options.PARTITION is a required field to launch the benchmark suite on slurm" slurm_options = AttrDict(config["slurm_options"]) executor.update_parameters( name=slurm_options.NAME, slurm_comment=slurm_options.COMMENT, slurm_partition=slurm_options.PARTITION, slurm_constraint=slurm_options.CONSTRAINT, timeout_min=slurm_options.TIMEOUT_MIN, nodes=1, cpus_per_task=slurm_options.CPUS_PER_TASK, tasks_per_node=1, mem_gb=slurm_options.MEM_GB, slurm_additional_parameters=slurm_options.ADDITIONAL_PARAMETERS, ) job = executor.submit(benchmark_suite_scheduler_job) print(f"SUBMITTED EVALUATION JOB: {job.job_id}")
def _construct_loader(self): """ Construct the video loader. """ path_to_file = os.path.join( self.cfg.DATA.PATH_TO_DATA_DIR, "{}.csv".format("train" if self.mode == "train" else "val"), ) assert g_pathmgr.exists(path_to_file), "{} dir not found".format( path_to_file) (self._path_to_videos, self._labels) = utils.load_image_lists(path_to_file, self.cfg.DATA.PATH_PREFIX, return_list=True) if self.mode != "train": # Form video-level labels from frame level annotations. self._labels = utils.convert_to_video_level_labels(self._labels) self._path_to_videos = list( chain.from_iterable([[x] * self._num_clips for x in self._path_to_videos])) self._labels = list( chain.from_iterable([[x] * self._num_clips for x in self._labels])) self._spatial_temporal_idx = list( chain.from_iterable( [range(self._num_clips) for _ in range(len(self._labels))])) logger.info( "Charades dataloader constructed (size: {}) from {}".format( len(self._path_to_videos), path_to_file))
def from_directory(cls, dir_path: str) -> LabeledVideoPaths: """ Factory function that creates a LabeledVideoPaths object by parsing the structure of the given directory's subdirectories into the classification labels. It expects the directory format to be the following: dir_path/<class_name>/<video_name>.mp4 Classes are indexed from 0 to the number of classes, alphabetically. E.g. dir_path/class_x/xxx.ext dir_path/class_x/xxy.ext dir_path/class_x/xxz.ext dir_path/class_y/123.ext dir_path/class_y/nsdf3.ext dir_path/class_y/asd932_.ext Would produce two classes labeled 0 and 1 with 3 videos paths associated with each. Args: dir_path (str): Root directory to the video class directories . """ assert g_pathmgr.exists(dir_path), f"{dir_path} not found." # Find all classes based on directory names. These classes are then sorted and indexed # from 0 to the number of classes. classes = sorted( (f for f in pathlib.Path(dir_path).iterdir() if f.is_dir())) class_to_idx = {classes[i]: i for i in range(len(classes))} video_paths_and_label = make_dataset(dir_path, class_to_idx, extensions=("mp4", "avi")) assert (len(video_paths_and_label) > 0), f"Failed to load dataset from {dir_path}." return cls(video_paths_and_label)
def get_coco_imgs_labels_info(split, data_source_dir, args): # pycocotools is an optional dependency for VISSL from pycocotools.coco import COCO json_file = f"{data_source_dir}/annotations/instances_{split}2014.json" assert g_pathmgr.exists( json_file), "Annotations file does not exist. Abort" json_data = json.load(g_pathmgr.open(json_file, "r")) image_index = [x["id"] for x in json_data["images"]] coco = COCO(json_file) num_cats = len(json_data["categories"]) logging.info("partition: {} num_cats: {} num_images: {}".format( split, num_cats, len(image_index))) cat_ids = [x["id"] for x in json_data["categories"]] coco_to_me = {val: ind for ind, val in enumerate(cat_ids)} cat_names = [str(x["name"]) for x in json_data["categories"]] cat_name_to_id, cat_id_to_name = {}, {} for ind, name in enumerate(cat_names): cat_name_to_id[name] = ind cat_id_to_name[ind] = name class_ids = cat_id_to_name.keys() assert len(list(class_ids)) == num_cats assert min(class_ids) == 0 assert max(class_ids) == len(class_ids) - 1 assert len(set(class_ids)) == len(class_ids) # label_matrix = np.zeros((len(image_index), len(cat_names)), dtype=np.float32) # area_matrix = np.zeros((len(image_index), len(cat_names)), dtype=np.float32) img_labels_map = {} num_classes = len(cat_names) for _, im_id in enumerate(image_index): ann_ids = coco.getAnnIds(imgIds=im_id) entry = coco.imgs[im_id] img_name = entry["file_name"] objs = coco.loadAnns(ann_ids) valid_objs = get_valid_objs(entry, objs) if img_name not in img_labels_map: img_labels_map[img_name] = np.zeros(num_classes, dtype=np.int32) for _, obj in enumerate(valid_objs): cocoCatId = obj["category_id"] myId = coco_to_me[cocoCatId] img_labels_map[img_name][myId] = 1.0 # label = 1 (present), 0 (not present) img_paths, img_labels = [], [] train_imgs_path = f"{data_source_dir}/train2014" val_imgs_path = f"{data_source_dir}/val2014" prefix = train_imgs_path if split == "train" else val_imgs_path for item in sorted(img_labels_map.keys()): img_paths.append(f"{prefix}/{item}") img_labels.append(img_labels_map[item]) # save to the datasets folder and return the path output_dir = get_output_dir() img_info_out_path = f"{output_dir}/{split}_images.npy" label_info_out_path = f"{output_dir}/{split}_labels.npy" save_file(np.array(img_paths), img_info_out_path) save_file(np.array(img_labels), label_info_out_path) return [img_info_out_path, label_info_out_path]
def get_images_labels_info(split, args): assert g_pathmgr.exists( args.data_source_dir), "Data source NOT found. Abort!" data_dir = f"{args.data_source_dir}/{split}" class_idx = get_all_classes(data_dir) logger.info("Number of classes in {} data: {}".format( split, len(class_idx))) all_classes = class_idx.keys() image_paths, image_classes, img_ids = [], [], [] for class_name in all_classes: class_label = class_idx[class_name] class_dir = f"{data_dir}/{class_name}" # get all the images in this dir for item in os.listdir(class_dir): if item not in [".", ".."]: image_paths.append(f"{class_dir}/{item}") img_ids.append(f"{class_name}/{item}") image_classes.append(class_label) output_dict = {} if args.generate_json: for idx in range(len(img_ids)): id = img_ids[idx] lbl = image_classes[idx] output_dict[id] = lbl return image_paths, image_classes, output_dict
def from_csv(cls, file_path: str) -> LabeledVideoPaths: """ Factory function that creates a LabeledVideoPaths object by reading a file with the following format: <path> <integer_label> ... <path> <integer_label> Args: file_path (str): The path to the file to be read. """ assert g_pathmgr.exists(file_path), f"{file_path} not found." video_paths_and_label = [] with g_pathmgr.open(file_path, "r") as f: for path_label in f.read().splitlines(): line_split = path_label.rsplit(None, 1) # The video path file may not contain labels (e.g. for a test split). We # assume this is the case if only 1 path is found and set the label to # -1 if so. if len(line_split) == 1: file_path = line_split[0] label = -1 else: file_path, label = line_split video_paths_and_label.append((file_path, int(label))) assert (len(video_paths_and_label) > 0), f"Failed to load dataset from {file_path}." return cls(video_paths_and_label)
def _construct_loader(self): """ Construct the video loader. """ path_to_file = os.path.join(self.cfg.DATA.PATH_TO_DATA_DIR, "{}.csv".format(self.mode)) assert g_pathmgr.exists(path_to_file), "{} dir not found".format( path_to_file) self._path_to_videos = [] self._labels = [] self._spatial_temporal_idx = [] with g_pathmgr.open(path_to_file, "r") as f: for clip_idx, path_label in enumerate(f.read().splitlines()): assert (len( path_label.split(self.cfg.DATA.PATH_LABEL_SEPARATOR)) == 2) path, label = path_label.split( self.cfg.DATA.PATH_LABEL_SEPARATOR) for idx in range(self._num_clips): self._path_to_videos.append( os.path.join(self.cfg.DATA.PATH_PREFIX, path)) self._labels.append(int(label)) self._spatial_temporal_idx.append(idx) self._video_meta[clip_idx * self._num_clips + idx] = {} assert (len(self._path_to_videos) > 0), "Failed to load Kinetics split {} from {}".format( self._split_idx, path_to_file) logger.info( "Constructing kinetics dataloader (size: {}) from {}".format( len(self._path_to_videos), path_to_file))
def on_start(self, task) -> None: if not is_primary() or getattr(task, "test_only", False): return if not g_pathmgr.exists(self.torchscript_folder): err_msg = "Torchscript folder '{}' does not exist.".format( self.torchscript_folder ) raise FileNotFoundError(err_msg)
def _generate_initial_benchmark_results(self): default_checkpoint = os.path.join(self.evaluation_dir(), "evaluation_metrics.json") autoload_slurm_evaluator_checkpoint = ( self.autoload_slurm_evaluator_checkpoint and g_pathmgr.exists(default_checkpoint)) if autoload_slurm_evaluator_checkpoint or self.slurm_evaluator_checkpoint: return self._load_evaluation_results_checkpoint() evaluation_configs = {} for benchmark in self.benchmarks: default_evaluation_name = os.path.split( benchmark["config_files"][0])[-1] evaluation_name = (benchmark.get("evaluation_name") or default_evaluation_name) last_phase = self.training_config.OPTIMIZER.num_epochs - 1 # TODO: Can we retrieve this from CheckpointWriter? if self.evaluate_final_phase: # Evaluate Last phase checkpoint training_checkpoint = f"model_final_checkpoint_phase{ last_phase }" self._set_initial_benchmark_result(benchmark, training_checkpoint, evaluation_name, evaluation_configs) if self.evaluation_phase_freq > -1: # Evaluate every "evaluation_phase_freq" phase checkpoint. evaluate_epochs = range( self.evaluation_phase_freq, last_phase)[::self.evaluation_phase_freq] for epoch in evaluate_epochs: training_checkpoint = f"model_phase{epoch}" self._set_initial_benchmark_result( benchmark, training_checkpoint, evaluation_name, evaluation_configs, ) if self.evaluation_iter_freq > -1: # Evaluate every "evaluation_iter_freq" iteration checkpoints. evaluate_iterations = range( self.evaluation_iter_freq, self.max_training_iterations)[::self.evaluation_iter_freq] for iteration in evaluate_iterations: training_checkpoint = f"model_iteration{iteration}" self._set_initial_benchmark_result( benchmark, training_checkpoint, evaluation_name, evaluation_configs, ) return evaluation_configs
def _validate_evaluation_setup(self): if self.evaluation_iter_freq > -1: assert ( self.evaluation_iter_freq % self.training_config.CHECKPOINT.CHECKPOINT_ITER_FREQUENCY ) == 0, "Evaluation iter frequency must evenly divide the checkpoint iter frequency" # NOQA if self.evaluation_phase_freq > -1: assert ( self.evaluation_phase_freq % self.training_config.CHECKPOINT.CHECKPOINT_FREQUENCY ) == 0, "Evaluation phase frequency must evenly divide the checkpoint phase frequency" # NOQA assert g_pathmgr.exists(self.training_config.SLURM.LOG_FOLDER ), "Training slurm log folder must exist" assert g_pathmgr.exists( self.training_config.CHECKPOINT.DIR ), "Training slurm checkpoint folder must exist"
def load_checkpoint(checkpoint_file, model, optimizer=None): """Loads the checkpoint from the given file.""" err_str = "Checkpoint '{}' not found" assert g_pathmgr.exists(checkpoint_file), err_str.format(checkpoint_file) with g_pathmgr.open(checkpoint_file, "rb") as f: checkpoint = torch.load(f, map_location="cpu") unwrap_model(model).load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) if optimizer else () return checkpoint["epoch"]
def cleanup_dir(dir): """ Utility for deleting a directory. Useful for cleaning the storage space that contains various training artifacts like checkpoints, data etc. """ if g_pathmgr.exists(dir): logging.info(f"Deleting directory: {dir}") os.system(f"rm -rf {dir}") logging.info(f"Deleted contents of directory: {dir}")
def has_checkpoint(path_to_job): """ Determines if the given directory contains a checkpoint. Args: path_to_job (string): the path to the folder of the current job. """ d = get_checkpoint_dir(path_to_job) files = g_pathmgr.ls(d) if g_pathmgr.exists(d) else [] return any("checkpoint" in f for f in files)
def train(self, features, targets, sample_num, low_shot_kvalue): """ Train SVM on the input features and targets for a given low-shot k-value and the independent low-shot sample number. We save the trained SVM model for each combination: cost value, class number, sample number, k-value """ logging.info("Training Low-shot SVM") if self.normalize: # normalize the features: N x 9216 (example shape) features = self._normalize_features(features) # get the class lists to train low-shot SVM classifier on self.cls_list = self._get_cls_list(targets) for cls_idx in range(len(self.cls_list)): cls_num = self.cls_list[cls_idx] for cost_idx in range(len(self.costs_list)): cost = self.costs_list[cost_idx] suffix = f"sample{sample_num}_k{low_shot_kvalue}" out_file = self._get_svm_low_shot_model_filename( cls_num, cost, suffix) if g_pathmgr.exists( out_file) and not self.config.force_retrain: logging.info(f"SVM model exists: {out_file}") continue logging.info(f"Training model with the cost: {cost}") clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=1, penalty=self.config["penalty"], loss=self.config["loss"], tol=0.0001, dual=self.config["dual"], max_iter=self.config["max_iter"], ) train_feats, train_cls_labels = self._get_cls_feats_labels( cls_num, features, targets) num_positives = len(np.where(train_cls_labels == 1)[0]) num_negatives = len(np.where(train_cls_labels == -1)[0]) logging.info( f"cls: {cls_num} has +ve: {num_positives} -ve: {num_negatives} " f"ratio: {float(num_positives) / num_negatives} " f"features: {train_feats.shape} " f"cls_labels: {train_cls_labels.shape}") clf.fit(train_feats, train_cls_labels) logging.info(f"Saving SVM model to: {out_file}") with g_pathmgr.open(out_file, "wb") as fwrite: pickle.dump(clf, fwrite) logging.info( f"Done training: sample: {sample_num} k-value: {low_shot_kvalue}")
def get_images_labels_info(split, args): assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort" data_files = get_data_files(split, args) # we will construct a map for image name to the vector of -1, 0, 1 # we sort the data_files which gives sorted class names as well img_labels_map = {} for cls_num, data_path in enumerate(sorted(data_files)): # for this class, we have images and each image will have label # 1, -1, 0 -> present, not present, ignore respectively as in VOC data. with g_pathmgr.open(data_path, "r") as fopen: for line in fopen: try: img_name, orig_label = line.strip().split() if img_name not in img_labels_map: img_labels_map[img_name] = -( np.ones(len(data_files), dtype=np.int32) ) orig_label = int(orig_label) # in VOC data, -1 (not present), set it to 0 as train target if orig_label == -1: orig_label = 0 # in VOC data, 0 (ignore), set it to -1 as train target elif orig_label == 0: orig_label = -1 img_labels_map[img_name][cls_num] = orig_label except Exception: logger.info( "Error processing: {} data_path: {}".format(line, data_path) ) img_paths, img_labels = [], [] for item in sorted(img_labels_map.keys()): img_paths.append(f"{args.data_source_dir}/JPEGImages/{item}.jpg") img_labels.append(img_labels_map[item]) output_dict = {} if args.generate_json: cls_names = [] for item in sorted(data_files): name = item.split("/")[-1].split(".")[0].split("_")[0] cls_names.append(name) img_ids, json_img_labels = [], [] for item in sorted(img_labels_map.keys()): img_ids.append(item) json_img_labels.append(img_labels_map[item]) for img_idx in range(len(img_ids)): img_id = img_ids[img_idx] out_lbl = {} for cls_idx in range(len(cls_names)): name = cls_names[cls_idx] out_lbl[name] = int(json_img_labels[img_idx][cls_idx]) output_dict[img_id] = out_lbl return img_paths, img_labels, output_dict
def _save_label_cls_idx_map(self, cls_idx_map: Dict[str, int], split: str): local_rank, dist_rank = get_machine_local_and_dist_rank() if dist_rank == 0: checkpoint_folder = get_checkpoint_folder(self.cfg) class_idx_file_path = ( f"{checkpoint_folder}/{split.lower()}_label_to_index_map.json") if not g_pathmgr.exists(class_idx_file_path): save_file(cls_idx_map, class_idx_file_path, append_to_json=False)
def __init__(self, data_path, split): assert g_pathmgr.exists(data_path), "Data path '{}' not found".format( data_path) splits = ["train", "test"] assert split in splits, "Split '{}' not supported for cifar".format( split) logger.info("Constructing CIFAR-10 {}...".format(split)) self._im_size = cfg.TRAIN.IM_SIZE self._data_path, self._split = data_path, split self._inputs, self._labels = self._load_data()
def get_queries_features( cfg, temp_dir, eval_dataset_name, resize_img, spatial_levels, image_helper, eval_dataset, model, pca, ): features_queries = [] num_queries = eval_dataset.get_num_query_images() num_queries = (num_queries if cfg.IMG_RETRIEVAL.NUM_QUERY_SAMPLES == -1 else cfg.IMG_RETRIEVAL.NUM_QUERY_SAMPLES) logging.info(f"Getting features for queries: {num_queries}") q_fname_out_dir = None if q_fname_out_dir: q_fname_out_dir = f"{temp_dir}/{eval_dataset_name}_S{resize_img}_q" makedir(q_fname_out_dir) for idx in range(num_queries): if idx % LOG_FREQUENCY == 0: logging.info(f"Eval Query: {idx}"), q_fname_in = eval_dataset.get_query_filename(idx) # Optionally crop the query by the region-of-interest (ROI). roi = (eval_dataset.get_query_roi(idx) if cfg.IMG_RETRIEVAL.CROP_QUERY_ROI else None) q_fname_out = None if q_fname_out_dir: q_fname_out = f"{q_fname_out_dir}/{idx}.npy" if q_fname_out and g_pathmgr.exists(q_fname_out): query_feature = load_file(q_fname_out) else: query_feature = process_eval_image( cfg, q_fname_in, roi, q_fname_out, spatial_levels, image_helper, model, pca, eval_dataset_name, verbose=(idx == 0), ) features_queries.append(query_feature) features_queries = np.vstack(features_queries) logging.info(f"Queries Features Size: {features_queries.shape}") return features_queries
def copy_file(input_file, destination_dir, tmp_destination_dir): """ Copy a given input_file from source to the destination directory. Steps: 1. We use g_pathmgr to extract the data to local path. 2. we simply move the files from the g_pathmgr cached local directory to the user specified destination directory. We use rsync. How destination dir is chosen: a) If user is using slurm, we set destination_dir = slurm_dir (see get_slurm_dir) b) If the local path used by PathManafer is same as the input_file path, and the destination directory is not specified, we set destination_dir = tmp_destination_dir Returns: output_file (str): the new path of the file destination_dir (str): the destination dir that was actually used """ # we first extract the local path for the files. g_pathmgr # determines the local path itself and copies data there. logging.info(f"Copying {input_file} to local path...") out = g_pathmgr.get_local_path(input_file) output_dir = os.path.dirname(out) logging.info(f"File coped to: {out}") if (out == input_file) and not destination_dir: destination_dir = tmp_destination_dir logging.info( f"The file wasn't copied. Copying again to temp " f"destination directory: {destination_dir}" ) # if the user wants to copy the files to a specific location, # we simply move the files from the g_pathmgr cached directory # to the user specified directory. destination_dir = get_slurm_dir(destination_dir) if "SLURM_JOBID" in os.environ: destination_dir = get_slurm_dir(destination_dir) if destination_dir is not None: makedir(destination_dir) output_file = f"{destination_dir}/{os.path.basename(input_file)}" if g_pathmgr.exists(output_file): logging.info(f"File already copied: {output_file}") return output_file, destination_dir logging.info(f"Copying file: {input_file} to destination: {destination_dir}") stime = time.perf_counter() os.system(f"rsync -a --progress {out} {destination_dir}") etime = time.perf_counter() logging.info( f"Copied file | time (sec): {round(etime - stime, 4)} " f"size: {get_file_size(output_file)}" ) return output_file, destination_dir else: return out, output_dir