def load_labeled_data( data: Union[str, Iterable[Union[str, numpy.ndarray, Dict[str, numpy.ndarray]]]], labels: Union[None, str, Iterable[Union[str, numpy.ndarray, Dict[str, numpy.ndarray]]]], raise_on_error: bool = True, ) -> List[Tuple[Union[numpy.ndarray, Dict[str, numpy.ndarray]], Union[ None, numpy.ndarray, Dict[str, numpy.ndarray]], ]]: """ Load labels and data from disk or from memory and group them together. Assumes sorted ordering for on disk. Will match between when a file glob is passed for either data and/or labels. :param data: the file glob, file path to numpy data tar ball, or list of arrays to use for data :param labels: the file glob, file path to numpy data tar ball, or list of arrays to use for labels, if any :param raise_on_error: True to raise on any error that occurs; False to log a warning, ignore, and continue :return: a list containing tuples of the data, labels. If labels was passed in as None, will now contain a None for the second index in each tuple """ if isinstance(data, str): data = load_numpy_list(data) if labels is None: labels = [None for _ in range(len(data))] elif isinstance(labels, str): labels = load_numpy_list(labels) if len(data) != len(labels) and labels: # always raise this error, lengths must match raise ValueError( "len(data) given of {} does not match len(labels) given of {}". format(len(data), len(labels))) labeled_data = [] for dat, lab in zip(data, labels): try: if isinstance(dat, str): dat = load_numpy(dat) if lab is not None and isinstance(lab, str): lab = load_numpy(lab) labeled_data.append((dat, lab)) except Exception as err: if raise_on_error: raise err else: _LOGGER.error("Error creating labeled data: {}".format(err)) return labeled_data
def load_images( dataset_path: Optional[str], image_size: Tuple[int] ) -> Tuple[List[numpy.ndarray], List[numpy.ndarray]]: """ :param dataset_path: optional path to image files to load, if None, images are loaded from the SparseZoo :param image_size: size to resize images to :return: List of loaded images resized and transposed to given size and list of non resized images """ path = str(Path(dataset_path).absolute()) if dataset_path else None if not path: # load from SparseZoo zoo_model = zoo_yolo_v3() images = load_numpy_list(zoo_model.data_originals.downloaded_path()) # unwrap npz dict key = list(images[0].keys())[0] images = [image[key] for image in images] elif "*" in path: # load from local file(s) adapted from yolov5/utils/datasets.py images = sorted(glob.glob(path, recursive=True)) # glob elif os.path.isdir(path): images = sorted(glob.glob(os.path.join(path, "*.*"))) # dir elif os.path.isfile(path): images = [path] # files else: raise Exception(f"ERROR: {path} does not exist") numpy.random.shuffle(images) model_images = [] original_images = [] for image in images: model_image, original_image = load_image(image, image_size) model_images.append(model_image) original_images.append(original_image) return model_images, original_images
def load_data(data_path: str, ) -> List[List[numpy.ndarray]]: """ Loads data from given sparseZoo stub or directory with .npz files :param data_path: directory path to .npz files to load or SparseZoo stub :return: List of loaded npz files """ if data_path.startswith("zoo:"): data_dir = Zoo.load_model_from_stub( data_path).data_inputs.downloaded_path() else: data_dir = data_path data_files = os.listdir(data_dir) if any(".npz" not in file_name for file_name in data_files): raise RuntimeError( f"All files in data directory {data_dir} must have a .npz extension " f"found {[name for name in data_files if '.npz' not in name]}") samples = load_numpy_list(data_dir) # unwrap unloaded numpy files samples = [ load_numpy(sample) if isinstance(sample, str) else sample for sample in samples ] processed_samples = [] for idx, sample in enumerate(samples): sample = list(sample.values()) processed_samples.append(sample) return processed_samples
def _load_data(args, input_names) -> List[List[numpy.ndarray]]: if args.data_path.startswith("zoo:"): data_dir = Zoo.load_model_from_stub( args.data_path).data_inputs.downloaded_path() else: data_dir = args.data_path data_files = os.listdir(data_dir) if any(".npz" not in file_name for file_name in data_files): raise RuntimeError( f"All files in data directory {data_dir} must have a .npz extension " f"found {[name for name in data_files if '.npz' not in name]}") samples = load_numpy_list(data_dir) # unwrap unloaded numpy files samples = [ load_numpy(sample) if isinstance(sample, str) else sample for sample in samples ] processed_samples = [] warning_given = False for sample in samples: if not all(inp_name in sample for inp_name in input_names) or len(input_names) != len(sample): if not warning_given: warnings.warn( "input sample found whose input names do not match the model input " "names, this may cause an exception during benchmarking") warning_given = True sample = list(sample.values()) else: sample = [sample[inp_name] for inp_name in input_names] for idx, array in enumerate(sample): processed_array = numpy.zeros( [args.max_sequence_length, *array.shape[1:]], dtype=array.dtype, ) if array.shape[0] < args.max_sequence_length: processed_array[:array.shape[0], ...] = array else: processed_array[:, ...] = array[:args.max_sequence_length, ...] sample[idx] = processed_array processed_samples.append(sample) return processed_samples