def load_single_label_file(self, path: str): """ Load the single data file. We only support user specifying the numpy label files if user is specifying a data_filelist source of labels. To save memory, if the mmap_mode is set to True for loading, we try to load the images in mmap_mode. If it fails, we simply load the labels without mmap """ assert g_pathmgr.isfile(path), f"Path to labels {path} is not a file" assert path.endswith("npy"), "Please specify a numpy file for labels" if self.cfg["DATA"][self.split].MMAP_MODE: try: with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True, mmap_mode="r") except ValueError as e: logging.info( f"Could not mmap {path}: {e}. Trying without g_pathmgr") labels = np.load(path, allow_pickle=True, mmap_mode="r") logging.info("Successfully loaded without g_pathmgr") except Exception: logging.info( "Could not mmap without g_pathmgr. Trying without mmap") with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True) else: with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True) return labels
def _read_video_paths_and_labels( label_name_file: str, video_label_file: str, video_path_label_file: str, prefix: str = "", ) -> Tuple[List[str], List[int]]: """ Args: label_name_file (str): ssv2 label file that contians the label names and indexes. ('/path/to/folder/something-something-v2-labels.json') video_label_file (str): a file that contains video ids and the corresponding video label. (e.g., '/path/to/folder/something-something-v2-train.json') video_path_label_file (str): a file that contains frame paths for each video and the corresponding frame label. The file must be a space separated csv of the format: `original_vido_id video_id frame_id path labels` prefix (str): prefix path to add to all paths from video_path_label_file. Returns: image_paths (list): list of list containing path to each frame. labels (list): list containing label of each video. """ # Loading image paths. paths = defaultdict(list) with g_pathmgr.open(video_path_label_file, "r") as f: # Space separated CSV with format: original_vido_id video_id frame_id path labels csv_reader = csv.DictReader(f, delimiter=" ") for row in csv_reader: assert len(row) == 5 video_name = row["original_vido_id"] path = os.path.join(prefix, row["path"]) paths[video_name].append(path) # Loading label names. with g_pathmgr.open(label_name_file, "r") as f: label_name_dict = json.load(f) with g_pathmgr.open(video_label_file, "r") as f: video_label_json = json.load(f) labels = [] image_paths = [] for video in video_label_json: video_name = video["id"] if video_name in paths: template = video["template"] template = template.replace("[", "") template = template.replace("]", "") label = int(label_name_dict[template]) image_paths.append(paths[video_name]) labels.append(label) return image_paths, labels
def load_file(filename, mmap_mode=None, verbose=True, allow_pickle=False): """ Common i/o utility to handle loading data from various file formats. Supported: .pkl, .pickle, .npy, .json For the npy files, we support reading the files in mmap_mode. If the mmap_mode of reading is not successful, we load data without the mmap_mode. """ if verbose: logging.info(f"Loading data from file: {filename}") file_ext = os.path.splitext(filename)[1] if file_ext == ".txt": with g_pathmgr.open(filename, "r") as fopen: data = fopen.readlines() elif file_ext in [".pkl", ".pickle"]: with g_pathmgr.open(filename, "rb") as fopen: data = pickle.load(fopen, encoding="latin1") elif file_ext == ".npy": if mmap_mode: try: with g_pathmgr.open(filename, "rb") as fopen: data = np.load( fopen, allow_pickle=allow_pickle, encoding="latin1", mmap_mode=mmap_mode, ) except ValueError as e: logging.info( f"Could not mmap {filename}: {e}. Trying without g_pathmgr" ) data = np.load( filename, allow_pickle=allow_pickle, encoding="latin1", mmap_mode=mmap_mode, ) logging.info("Successfully loaded without g_pathmgr") except Exception: logging.info("Could not mmap without g_pathmgr. Trying without mmap") with g_pathmgr.open(filename, "rb") as fopen: data = np.load(fopen, allow_pickle=allow_pickle, encoding="latin1") else: with g_pathmgr.open(filename, "rb") as fopen: data = np.load(fopen, allow_pickle=allow_pickle, encoding="latin1") elif file_ext == ".json": with g_pathmgr.open(filename, "r") as fopen: data = json.load(fopen) elif file_ext == ".yaml": with g_pathmgr.open(filename, "r") as fopen: data = yaml.load(fopen, Loader=yaml.FullLoader) elif file_ext == ".csv": with g_pathmgr.open(filename, "r") as fopen: data = pd.read_csv(fopen) else: raise Exception(f"Reading from {file_ext} is not supported yet") return data
def _load_c2_weights(file_path): if file_path.endswith("pkl"): weights = _load_c2_pickled_weights(file_path) elif file_path.endswith("npy"): with g_pathmgr.open(file_path, "rb") as fopen: weights = np.load(fopen, allow_pickle=True, encoding="latin1")[()] return weights
def _cached_log_stream(filename): # Use 1K buffer if writing to cloud storage. io = g_pathmgr.open(filename, "a", buffering=1024 if "://" in filename else -1) atexit.register(io.close) return io
def train_cls(self, features, targets, cls_num): """ Train SVM on the input features and targets for a given class. The SVMs are trained for all costs values for the given class. We also save the cross-validation AP at each cost value for the given class. """ logging.info(f"Training cls: {cls_num}") for cost_idx in range(len(self.costs_list)): cost = self.costs_list[cost_idx] out_file, ap_out_file = self._get_svm_model_filename(cls_num, cost) if (g_pathmgr.exists(out_file) and g_pathmgr.exists(ap_out_file) and not self.config.force_retrain): logging.info(f"SVM model exists: {out_file}") logging.info(f"AP file exists: {ap_out_file}") continue logging.info( f"Training model with the cost: {cost} cls: {cls_num}") clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=1, penalty=self.config["penalty"], loss=self.config["loss"], tol=0.0001, dual=self.config["dual"], max_iter=self.config["max_iter"], ) cls_labels = targets[:, cls_num].astype(dtype=np.int32, copy=True) # meaning of labels in VOC/COCO original loaded target files: # label 0 = not present, set it to -1 as svm train target # label 1 = present. Make the svm train target labels as -1, 1. cls_labels[np.where(cls_labels == 0)] = -1 num_positives = len(np.where(cls_labels == 1)[0]) num_negatives = len(cls_labels) - num_positives logging.info( f"cls: {cls_num} has +ve: {num_positives} -ve: {num_negatives} " f"ratio: {float(num_positives) / num_negatives} " f"features: {features.shape} cls_labels: {cls_labels.shape}") ap_scores = cross_val_score( clf, features, cls_labels, cv=self.config["cross_val_folds"], scoring="average_precision", ) self.train_ap_matrix[cls_num][cost_idx] = ap_scores.mean() clf.fit(features, cls_labels) logging.info(f"cls: {cls_num} cost: {cost} AP: {ap_scores} " f"mean:{ap_scores.mean()}") logging.info(f"Saving cls cost AP to: {ap_out_file}") save_file(np.array([ap_scores.mean()]), ap_out_file) logging.info(f"Saving SVM model to: {out_file}") with g_pathmgr.open(out_file, "wb") as fwrite: pickle.dump(clf, fwrite)
def get_coco_imgs_labels_info(split, data_source_dir, args): # pycocotools is an optional dependency for VISSL from pycocotools.coco import COCO json_file = f"{data_source_dir}/annotations/instances_{split}2014.json" assert g_pathmgr.exists( json_file), "Annotations file does not exist. Abort" json_data = json.load(g_pathmgr.open(json_file, "r")) image_index = [x["id"] for x in json_data["images"]] coco = COCO(json_file) num_cats = len(json_data["categories"]) logging.info("partition: {} num_cats: {} num_images: {}".format( split, num_cats, len(image_index))) cat_ids = [x["id"] for x in json_data["categories"]] coco_to_me = {val: ind for ind, val in enumerate(cat_ids)} cat_names = [str(x["name"]) for x in json_data["categories"]] cat_name_to_id, cat_id_to_name = {}, {} for ind, name in enumerate(cat_names): cat_name_to_id[name] = ind cat_id_to_name[ind] = name class_ids = cat_id_to_name.keys() assert len(list(class_ids)) == num_cats assert min(class_ids) == 0 assert max(class_ids) == len(class_ids) - 1 assert len(set(class_ids)) == len(class_ids) # label_matrix = np.zeros((len(image_index), len(cat_names)), dtype=np.float32) # area_matrix = np.zeros((len(image_index), len(cat_names)), dtype=np.float32) img_labels_map = {} num_classes = len(cat_names) for _, im_id in enumerate(image_index): ann_ids = coco.getAnnIds(imgIds=im_id) entry = coco.imgs[im_id] img_name = entry["file_name"] objs = coco.loadAnns(ann_ids) valid_objs = get_valid_objs(entry, objs) if img_name not in img_labels_map: img_labels_map[img_name] = np.zeros(num_classes, dtype=np.int32) for _, obj in enumerate(valid_objs): cocoCatId = obj["category_id"] myId = coco_to_me[cocoCatId] img_labels_map[img_name][myId] = 1.0 # label = 1 (present), 0 (not present) img_paths, img_labels = [], [] train_imgs_path = f"{data_source_dir}/train2014" val_imgs_path = f"{data_source_dir}/val2014" prefix = train_imgs_path if split == "train" else val_imgs_path for item in sorted(img_labels_map.keys()): img_paths.append(f"{prefix}/{item}") img_labels.append(img_labels_map[item]) # save to the datasets folder and return the path output_dir = get_output_dir() img_info_out_path = f"{output_dir}/{split}_images.npy" label_info_out_path = f"{output_dir}/{split}_labels.npy" save_file(np.array(img_paths), img_info_out_path) save_file(np.array(img_labels), label_info_out_path) return [img_info_out_path, label_info_out_path]
def _init_weight_from_slice( cls, weight_path: str, weight: torch.Tensor, slice_state_dict: Dict[str, str], strict: bool = True, ): weight_path = cls._clean_path(weight_path) file_name = slice_state_dict.get(weight_path, None) if file_name is None: message = f"Could not find weights: {weight_path}" logging.info(message) if strict: raise ValueError( f"Could not find weights: {weight_path} among:\n{slice_state_dict.keys()}" ) return logging.info(f"Loading weights: {weight_path}") with g_pathmgr.open(file_name, "rb") as f: layer_checkpoint = torch.load(f) assert layer_checkpoint["type"] == CheckpointItemType.slice.name weight.copy_(layer_checkpoint["weight"]) logging.info(f"Loaded parameters '{weight_path}' from: {file_name}")
def retry_load_images(image_paths, retry=10, backend="pytorch"): """ This function is to load images with support of retrying for failed load. Args: image_paths (list): paths of images needed to be loaded. retry (int, optional): maximum time of loading retrying. Defaults to 10. backend (str): `pytorch` or `cv2`. Returns: imgs (list): list of loaded images. """ for i in range(retry): imgs = [] for image_path in image_paths: with g_pathmgr.open(image_path, "rb") as f: img_str = np.frombuffer(f.read(), np.uint8) img = cv2.imdecode(img_str, flags=cv2.IMREAD_COLOR) imgs.append(img) if all(img is not None for img in imgs): if backend == "pytorch": imgs = torch.as_tensor(np.stack(imgs)) return imgs else: logger.warn("Reading failed. Will retry.") time.sleep(1.0) if i == retry - 1: raise Exception("Failed to load images {}".format(image_paths))
def read_csv(csv_file, class_whitelist=None, load_score=False): """Loads boxes and class labels from a CSV file in the AVA format. CSV file format described at https://research.google.com/ava/download.html. Args: csv_file: A file object. class_whitelist: If provided, boxes corresponding to (integer) class labels not in this set are skipped. Returns: boxes: A dictionary mapping each unique image key (string) to a list of boxes, given as coordinates [y1, x1, y2, x2]. labels: A dictionary mapping each unique image key (string) to a list of integer class lables, matching the corresponding box in `boxes`. scores: A dictionary mapping each unique image key (string) to a list of score values lables, matching the corresponding label in `labels`. If scores are not provided in the csv, then they will default to 1.0. """ boxes = defaultdict(list) labels = defaultdict(list) scores = defaultdict(list) with g_pathmgr.open(csv_file, "r") as f: reader = csv.reader(f) for row in reader: assert len(row) in [7, 8], "Wrong number of columns: " + row image_key = make_image_key(row[0], row[1]) x1, y1, x2, y2 = [float(n) for n in row[2:6]] action_id = int(row[6]) if class_whitelist and action_id not in class_whitelist: continue score = 1.0 if load_score: score = float(row[7]) boxes[image_key].append([y1, x1, y2, x2]) labels[image_key].append(action_id) scores[image_key].append(score) return boxes, labels, scores
def map_features_to_img_filepath(cls, image_paths: List[str], input_dir: str, split: str, layer: str): """ Map the features across all GPUs to the respective filenames. Args: image_paths (List[str]): list of image paths. Obtained by dataset.get_image_paths() input_dir (str): input path where the features are dumped split (str): whether the features are train or test data features layer (str): the features correspond to what layer of the model """ logging.info(f"Merging features: {split} {layer}") output_dir = f"{input_dir}/features_to_image/{split}/{layer}" makedir(output_dir) logging.info(f"Saving the mapped features to dir: {output_dir} ...") shard_paths = cls.get_shard_file_names(input_dir, split=split, layer=layer) if not shard_paths: raise ValueError(f"No features found for {split} {layer}") for shard_path in shard_paths: shard_content = cls.load_feature_shard(shard_path) for idx in range(shard_content.num_samples): img_index = shard_content.indices[idx] img_feat = shard_content.features[idx] img_filename = os.path.splitext( os.path.basename(image_paths[img_index]))[0] out_feat_filename = os.path.join(output_dir, img_filename + ".npy") with g_pathmgr.open(out_feat_filename, "wb") as fopen: np.save(fopen, np.expand_dims(img_feat, axis=0))
def read_label_map(label_map_file: str) -> Tuple: """ Read label map and class ids. Args: label_map_file (str): Path to a .pbtxt containing class id's and class names Returns: (tuple): A tuple of the following, label_map (dict): A dictionary mapping class id to the associated class names. class_ids (set): A set of integer unique class id's """ label_map = {} class_ids = set() name = "" class_id = "" with g_pathmgr.open(label_map_file, "r") as f: for line in f: if line.startswith(" name:"): name = line.split('"')[1] elif line.startswith(" id:") or line.startswith( " label_id:"): class_id = int(line.strip().split(" ")[-1]) label_map[class_id] = name class_ids.add(class_id) return label_map, class_ids
def from_csv(cls, file_path: str) -> LabeledVideoPaths: """ Factory function that creates a LabeledVideoPaths object by reading a file with the following format: <path> <integer_label> ... <path> <integer_label> Args: file_path (str): The path to the file to be read. """ assert g_pathmgr.exists(file_path), f"{file_path} not found." video_paths_and_label = [] with g_pathmgr.open(file_path, "r") as f: for path_label in f.read().splitlines(): line_split = path_label.rsplit(None, 1) # The video path file may not contain labels (e.g. for a test split). We # assume this is the case if only 1 path is found and set the label to # -1 if so. if len(line_split) == 1: file_path = line_split[0] label = -1 else: file_path, label = line_split video_paths_and_label.append((file_path, int(label))) assert (len(video_paths_and_label) > 0), f"Failed to load dataset from {file_path}." return cls(video_paths_and_label)
def _load_image_from_path(image_path: str, num_retries: int = 10) -> Image: """ Loads the given image path using PathManager and decodes it as an RGB image. Args: image_path (str): the path to the image. num_retries (int): number of times to retry image reading to handle transient error. Returns: A PIL Image of the image RGB data with shape: (channel, height, width). The frames are of type np.uint8 and in the range [0 - 255]. Raises an exception if unable to load images. """ img_arr = None for i in range(num_retries): with g_pathmgr.open(image_path, "rb") as f: img_str = np.frombuffer(f.read(), np.uint8) img_bgr = cv2.imdecode(img_str, flags=cv2.IMREAD_COLOR) img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) if img_rgb is not None: img_arr = img_rgb break else: logging.warning(f"Reading attempt {i}/{num_retries} failed.") time.sleep(1e-6) if img_arr is None: raise Exception("Failed to load image from {}".format(image_path)) pil_image = Image.fromarray(img_arr) return pil_image
def __init__( self, csv_file: str, skiprows=2, index_col="class_label", col_class_label="hex_id", col_latitute="latitude_mean", col_longitude="longitude_mean", ): """ Required information in CSV: - class_indexes from 0 to n - respective class labels i.e. hexid - latitude and longitude """ with g_pathmgr.open(csv_file, "r") as fopen: self._df = pd.read_csv(fopen, index_col=index_col, skiprows=skiprows) self._df = self._df.sort_index() self._nclasses = len(self._df.index) self._col_class_label = col_class_label self._col_latitude = col_latitute self._col_longitude = col_longitude # map class label (hexid) to index self._label2index = dict( zip(self._df[self._col_class_label].tolist(), list(self._df.index)) ) self.name = os.path.splitext(os.path.basename(csv_file))[0] self.shortname = PARTITIONIG_MAP[self.name]
def _cached_log_stream(filename): # we tune the buffering value so that the logs are updated # frequently. log_buffer_kb = 10 * 1024 # 10KB io = g_pathmgr.open(filename, mode="a", buffering=log_buffer_kb) atexit.register(io.close) return io
def open( path: str, mode: str = "r", buffering: int = -1, encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None, ): if IOPathManager: return IOPathManager.open( path=path, mode=mode, buffering=buffering, encoding=encoding, errors=errors, newline=newline, ) return open( path, mode=mode, buffering=buffering, encoding=encoding, errors=errors, newline=newline, )
def save_checkpoint(path_to_job, model, optimizer, epoch, cfg): """ Save a checkpoint. Args: model (model): model to save the weight to the checkpoint. optimizer (optim): optimizer to save the historical state. epoch (int): current number of epoch of the model. cfg (CfgNode): configs to save. """ # Save checkpoints only from the master process. if not du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): return # Ensure that the checkpoint dir exists. g_pathmgr.mkdirs(get_checkpoint_dir(path_to_job)) # Omit the DDP wrapper in the multi-gpu setting. sd = model.module.state_dict() if cfg.NUM_GPUS > 1 else model.state_dict() normalized_sd = sub_to_normal_bn(sd) # Record the state. checkpoint = { "epoch": epoch, "model_state": normalized_sd, "optimizer_state": optimizer.state_dict(), "cfg": cfg.dump(), } # Write the checkpoint. path_to_checkpoint = get_path_to_checkpoint(path_to_job, epoch + 1) with g_pathmgr.open(path_to_checkpoint, "wb") as f: torch.save(checkpoint, f) return path_to_checkpoint
def load_cluster_assigment(cls, file_path: str) -> ClusterAssignment: with g_pathmgr.open(file_path, "rb") as f: content = torch.load(f) return ClusterAssignment( config=content[cls._CONFIG_KEY], cluster_assignments=content[cls._ASSIGN_KEY], )
def _construct_loader(self): """ Construct the video loader. """ path_to_file = os.path.join(self.cfg.DATA.PATH_TO_DATA_DIR, "{}.csv".format(self.mode)) assert g_pathmgr.exists(path_to_file), "{} dir not found".format( path_to_file) self._path_to_videos = [] self._labels = [] self._spatial_temporal_idx = [] with g_pathmgr.open(path_to_file, "r") as f: for clip_idx, path_label in enumerate(f.read().splitlines()): assert (len( path_label.split(self.cfg.DATA.PATH_LABEL_SEPARATOR)) == 2) path, label = path_label.split( self.cfg.DATA.PATH_LABEL_SEPARATOR) for idx in range(self._num_clips): self._path_to_videos.append( os.path.join(self.cfg.DATA.PATH_PREFIX, path)) self._labels.append(int(label)) self._spatial_temporal_idx.append(idx) self._video_meta[clip_idx * self._num_clips + idx] = {} assert (len(self._path_to_videos) > 0), "Failed to load Kinetics split {} from {}".format( self._split_idx, path_to_file) logger.info( "Constructing kinetics dataloader (size: {}) from {}".format( len(self._path_to_videos), path_to_file))
def create_sun397_disk_filelist_dataset(input_path: str, output_path: str, seed: int): """ Create partitions "train", "trainval", "val", "test" from the input path of SUN397 by allocating 70% of labels to "train", 10% to "val" and 20% to "test". """ random.seed(seed) g_pathmgr.mkdirs(output_path) # List all the available classes in SUN397 and their path image_folder = os.path.join(input_path, "SUN397") class_names_file = os.path.join(image_folder, "ClassName.txt") class_paths = [] with open(class_names_file, "r") as f: for line in f: path = line.strip() if path.startswith("/"): path = path[1:] class_paths.append(path) # For each label, split the samples in train/val/test and add them # to the list of samples associated to each split splits_data = { "train": SplitData(), "val": SplitData(), "test": SplitData(), "trainval": SplitData(), } for i, class_path in tqdm(enumerate(class_paths), total=len(class_paths)): full_class_path = os.path.join(image_folder, class_path) image_names = os.listdir(full_class_path) splits = split_sample_list(image_names) for split, images in splits.items(): for image_name in images: image_path = os.path.join(full_class_path, image_name) splits_data[split].image_paths.append(image_path) splits_data[split].image_labels.append(i) # Save each split for split, samples in splits_data.items(): image_output_path = os.path.join(output_path, f"{split}_images.npy") with g_pathmgr.open(image_output_path, mode="wb") as f: np.save(f, np.array(samples.image_paths)) label_output_path = os.path.join(output_path, f"{split}_labels.npy") with g_pathmgr.open(label_output_path, mode="wb") as f: np.save(f, np.array(samples.image_labels))
def __init__(self): with g_pathmgr.open(self.IMAGENET_TARGETS_URL) as f: imagenet_classes = [line.strip() for line in f.readlines()] imagenet_classes.sort() self.label_to_id = { label: i for i, label in enumerate(imagenet_classes) }
def _load_c2_pickled_weights(file_path): with g_pathmgr.open(file_path, "rb") as f: data = pickle.load(f, encoding="latin1") if "blobs" in data: weights = data["blobs"] else: weights = data return weights
def register_json(json_catalog_path): """ Args: filepath: a .json filepath that contains the data to be registered """ with g_pathmgr.open(json_catalog_path) as fopen: data_catalog = json.load(fopen) for key, value in data_catalog.items(): VisslDatasetCatalog.register_data(key, value)
def test_factory_open(self) -> None: with g_pathmgr.open(self._tmpfile, "r") as f: self.assertEqual(f.read(), self._tmpfile_contents) _pathmgr = PathManagerFactory.get("test_pm") with _pathmgr.open(self._tmpfile, "r") as f: self.assertEqual(f.read(), self._tmpfile_contents) PathManagerFactory.remove("test_pm")
def load_checkpoint(checkpoint_file, model, optimizer=None): """Loads the checkpoint from the given file.""" err_str = "Checkpoint '{}' not found" assert g_pathmgr.exists(checkpoint_file), err_str.format(checkpoint_file) with g_pathmgr.open(checkpoint_file, "rb") as f: checkpoint = torch.load(f, map_location="cpu") unwrap_model(model).load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) if optimizer else () return checkpoint["epoch"]
def save_cluster_assignment(cls, output_dir: str, assignments: ClusterAssignment): output_file = os.path.join(output_dir, "cluster_assignments.torch") with g_pathmgr.open(output_file, "wb") as f: content = { cls._CONFIG_KEY: assignments.config, cls._ASSIGN_KEY: assignments.cluster_assignments, } torch.save(content, f)
def load_image(self, im_path): """Prepares the image for network input with format of CHW RGB float""" with g_pathmgr.open(im_path, "rb") as f: with Image.open(f) as im: im = im.convert("RGB") im = torch.from_numpy(np.array(im).astype(np.float32) / 255.0) # H W C to C H W im = im.permute([2, 0, 1]) return im
def get_images_labels_info(split, args): assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort" data_files = get_data_files(split, args) # we will construct a map for image name to the vector of -1, 0, 1 # we sort the data_files which gives sorted class names as well img_labels_map = {} for cls_num, data_path in enumerate(sorted(data_files)): # for this class, we have images and each image will have label # 1, -1, 0 -> present, not present, ignore respectively as in VOC data. with g_pathmgr.open(data_path, "r") as fopen: for line in fopen: try: img_name, orig_label = line.strip().split() if img_name not in img_labels_map: img_labels_map[img_name] = -( np.ones(len(data_files), dtype=np.int32) ) orig_label = int(orig_label) # in VOC data, -1 (not present), set it to 0 as train target if orig_label == -1: orig_label = 0 # in VOC data, 0 (ignore), set it to -1 as train target elif orig_label == 0: orig_label = -1 img_labels_map[img_name][cls_num] = orig_label except Exception: logger.info( "Error processing: {} data_path: {}".format(line, data_path) ) img_paths, img_labels = [], [] for item in sorted(img_labels_map.keys()): img_paths.append(f"{args.data_source_dir}/JPEGImages/{item}.jpg") img_labels.append(img_labels_map[item]) output_dict = {} if args.generate_json: cls_names = [] for item in sorted(data_files): name = item.split("/")[-1].split(".")[0].split("_")[0] cls_names.append(name) img_ids, json_img_labels = [], [] for item in sorted(img_labels_map.keys()): img_ids.append(item) json_img_labels.append(img_labels_map[item]) for img_idx in range(len(img_ids)): img_id = img_ids[img_idx] out_lbl = {} for cls_idx in range(len(cls_names)): name = cls_names[cls_idx] out_lbl[name] = int(json_img_labels[img_idx][cls_idx]) output_dict[img_id] = out_lbl return img_paths, img_labels, output_dict
def parse_bboxes_file(ann_filenames, ann_is_gt_box, detect_thresh, boxes_sample_rate=1): """ Parse AVA bounding boxes files. Args: ann_filenames (list of str(s)): a list of AVA bounding boxes annotation files. ann_is_gt_box (list of bools): a list of boolean to indicate whether the corresponding ann_file is ground-truth. `ann_is_gt_box[i]` correspond to `ann_filenames[i]`. detect_thresh (float): threshold for accepting predicted boxes, range [0, 1]. boxes_sample_rate (int): sample rate for test bounding boxes. Get 1 every `boxes_sample_rate`. """ all_boxes = {} count = 0 unique_box_count = 0 for filename, is_gt_box in zip(ann_filenames, ann_is_gt_box): with g_pathmgr.open(filename, "r") as f: for line in f: row = line.strip().split(",") # When we use predicted boxes to train/eval, we need to # ignore the boxes whose scores are below the threshold. if not is_gt_box: score = float(row[7]) if score < detect_thresh: continue video_name, frame_sec = row[0], int(row[1]) if frame_sec % boxes_sample_rate != 0: continue # Box with format [x1, y1, x2, y2] with a range of [0, 1] as float. box_key = ",".join(row[2:6]) box = list(map(float, row[2:6])) label = -1 if row[6] == "" else int(row[6]) if video_name not in all_boxes: all_boxes[video_name] = {} for sec in AVA_VALID_FRAMES: all_boxes[video_name][sec] = {} if box_key not in all_boxes[video_name][frame_sec]: all_boxes[video_name][frame_sec][box_key] = [box, []] unique_box_count += 1 all_boxes[video_name][frame_sec][box_key][1].append(label) if label != -1: count += 1 for video_name in all_boxes.keys(): for frame_sec in all_boxes[video_name].keys(): # Save in format of a list of [box_i, box_i_labels]. all_boxes[video_name][frame_sec] = list( all_boxes[video_name][frame_sec].values()) return all_boxes, count, unique_box_count