Example #1
0
def load_labeled_data(
    data: Union[str, Iterable[Union[str, numpy.ndarray, Dict[str,
                                                             numpy.ndarray]]]],
    labels: Union[None, str, Iterable[Union[str, numpy.ndarray,
                                            Dict[str, numpy.ndarray]]]],
    raise_on_error: bool = True,
) -> List[Tuple[Union[numpy.ndarray, Dict[str, numpy.ndarray]], Union[
        None, numpy.ndarray, Dict[str, numpy.ndarray]], ]]:
    """
    Load labels and data from disk or from memory and group them together.
    Assumes sorted ordering for on disk. Will match between when a file glob is passed
    for either data and/or labels.

    :param data: the file glob, file path to numpy data tar ball, or list of arrays to
        use for data
    :param labels: the file glob, file path to numpy data tar ball, or list of arrays
        to use for labels, if any
    :param raise_on_error: True to raise on any error that occurs;
        False to log a warning, ignore, and continue
    :return: a list containing tuples of the data, labels. If labels was passed in
        as None, will now contain a None for the second index in each tuple
    """
    if isinstance(data, str):
        data = load_numpy_list(data)

    if labels is None:
        labels = [None for _ in range(len(data))]
    elif isinstance(labels, str):
        labels = load_numpy_list(labels)

    if len(data) != len(labels) and labels:
        # always raise this error, lengths must match
        raise ValueError(
            "len(data) given of {} does not match len(labels) given of {}".
            format(len(data), len(labels)))

    labeled_data = []

    for dat, lab in zip(data, labels):
        try:
            if isinstance(dat, str):
                dat = load_numpy(dat)

            if lab is not None and isinstance(lab, str):
                lab = load_numpy(lab)

            labeled_data.append((dat, lab))
        except Exception as err:
            if raise_on_error:
                raise err
            else:
                _LOGGER.error("Error creating labeled data: {}".format(err))

    return labeled_data
Example #2
0
def load_images(
        dataset_path: Optional[str], image_size: Tuple[int]
) -> Tuple[List[numpy.ndarray], List[numpy.ndarray]]:
    """
    :param dataset_path: optional path to image files to load, if None, images
        are loaded from the SparseZoo
    :param image_size: size to resize images to
    :return: List of loaded images resized and transposed to given size and list
        of non resized images
    """
    path = str(Path(dataset_path).absolute()) if dataset_path else None

    if not path:  # load from SparseZoo
        zoo_model = zoo_yolo_v3()
        images = load_numpy_list(zoo_model.data_originals.downloaded_path())
        # unwrap npz dict
        key = list(images[0].keys())[0]
        images = [image[key] for image in images]
    elif "*" in path:  # load from local file(s) adapted from yolov5/utils/datasets.py
        images = sorted(glob.glob(path, recursive=True))  # glob
    elif os.path.isdir(path):
        images = sorted(glob.glob(os.path.join(path, "*.*")))  # dir
    elif os.path.isfile(path):
        images = [path]  # files
    else:
        raise Exception(f"ERROR: {path} does not exist")

    numpy.random.shuffle(images)
    model_images = []
    original_images = []
    for image in images:
        model_image, original_image = load_image(image, image_size)
        model_images.append(model_image)
        original_images.append(original_image)
    return model_images, original_images
Example #3
0
def load_data(data_path: str, ) -> List[List[numpy.ndarray]]:
    """
    Loads data from given sparseZoo stub or directory with .npz files
    :param data_path: directory path to .npz files to load or SparseZoo stub
    :return: List of loaded npz files
    """

    if data_path.startswith("zoo:"):
        data_dir = Zoo.load_model_from_stub(
            data_path).data_inputs.downloaded_path()
    else:
        data_dir = data_path
        data_files = os.listdir(data_dir)
        if any(".npz" not in file_name for file_name in data_files):
            raise RuntimeError(
                f"All files in data directory {data_dir} must have a .npz extension "
                f"found {[name for name in data_files if '.npz' not in name]}")

    samples = load_numpy_list(data_dir)
    # unwrap unloaded numpy files
    samples = [
        load_numpy(sample) if isinstance(sample, str) else sample
        for sample in samples
    ]

    processed_samples = []
    for idx, sample in enumerate(samples):
        sample = list(sample.values())
        processed_samples.append(sample)

    return processed_samples
Example #4
0
def _load_data(args, input_names) -> List[List[numpy.ndarray]]:
    if args.data_path.startswith("zoo:"):
        data_dir = Zoo.load_model_from_stub(
            args.data_path).data_inputs.downloaded_path()
    else:
        data_dir = args.data_path
        data_files = os.listdir(data_dir)
        if any(".npz" not in file_name for file_name in data_files):
            raise RuntimeError(
                f"All files in data directory {data_dir} must have a .npz extension "
                f"found {[name for name in data_files if '.npz' not in name]}")

    samples = load_numpy_list(data_dir)

    # unwrap unloaded numpy files
    samples = [
        load_numpy(sample) if isinstance(sample, str) else sample
        for sample in samples
    ]

    processed_samples = []
    warning_given = False
    for sample in samples:
        if not all(inp_name in sample for inp_name in
                   input_names) or len(input_names) != len(sample):
            if not warning_given:
                warnings.warn(
                    "input sample found whose input names do not match the model input "
                    "names, this may cause an exception during benchmarking")
                warning_given = True
            sample = list(sample.values())
        else:
            sample = [sample[inp_name] for inp_name in input_names]

        for idx, array in enumerate(sample):
            processed_array = numpy.zeros(
                [args.max_sequence_length, *array.shape[1:]],
                dtype=array.dtype,
            )
            if array.shape[0] < args.max_sequence_length:
                processed_array[:array.shape[0], ...] = array
            else:
                processed_array[:, ...] = array[:args.max_sequence_length, ...]
            sample[idx] = processed_array
        processed_samples.append(sample)
    return processed_samples