def test_magic_number_parsing(file_name, file_path, true_metadata):

    with tempfile.TemporaryDirectory() as dirpath:
        file_to_read_full_path = gunzip_to_dir(file_name, file_path, dirpath)
        metadata = rd.get_metadata(file_to_read_full_path)

    assert metadata == true_metadata
    def __init__(self, path_to_image_store, path_to_labels, transform=None):
        # Load the Data
        im_metadata = rd.get_metadata(path_to_image_store)
        lb_metadata = rd.get_metadata(path_to_labels)

        self.image_data = rd.get_data(path_to_image_store, im_metadata)
        self.label_data = rd.get_data(path_to_labels, lb_metadata)

        # Convert to Tensors
        self.image_data = torch.tensor(self.image_data, dtype=torch.float)
        self.label_data = torch.tensor(self.label_data, dtype=torch.long)

        # Fix Dimensions of Images
        self.image_data = self.image_data.unsqueeze(1)

        if self.image_data.size(0) != self.label_data.size(0):
            raise MismatchedDataError(
                "len(Images) != len(labels). Check loaded data!")

        self.transform = transform
def process_idx_file(ip_file_name, ip_file_path, is_label=False):

    with tempfile.TemporaryDirectory() as dirpath:
        file_to_read_full_path = gunzip_to_dir(ip_file_name, ip_file_path,
                                               dirpath)
        metadata = rd.get_metadata(file_to_read_full_path)
        idx_data = rd.get_data(file_to_read_full_path, metadata)

    if is_label:
        ret_tensor = torch.tensor(idx_data, dtype=torch.long)
    else:
        ret_tensor = torch.tensor(idx_data, dtype=torch.float)
    return ret_tensor
def test_get_data_lecun_idx_matrix(debug=False):

    file_name = "train-images-idx3-ubyte.gz"
    file_path = os.path.join("..", "..", "data", "mnist")

    with tempfile.TemporaryDirectory() as dirpath:
        file_to_read_full_path = gunzip_to_dir(file_name, file_path, dirpath)
        metadata = rd.get_metadata(file_to_read_full_path)
        # Run get_data()
        idx_data = rd.get_data(file_to_read_full_path, metadata)

    # Set Debug = True to View the First 3 Images : (5,0,4)
    if debug:
        with np.printoptions(threshold=np.inf, linewidth=150):
            print(idx_data[0:3, :, :])

    assert idx_data.shape == (60000, 28, 28)
def test_get_data_lecun_idx_vector(debug=False):

    file_name = "train-labels-idx1-ubyte.gz"
    file_path = os.path.join("..", "..", "data", "mnist")

    with tempfile.TemporaryDirectory() as dirpath:
        file_to_read_full_path = gunzip_to_dir(file_name, file_path, dirpath)
        metadata = rd.get_metadata(file_to_read_full_path)
        # Run get_data()
        idx_data = rd.get_data(file_to_read_full_path, metadata)

    if debug:
        print(idx_data[0:3])

    assert idx_data.shape == (60000, )

    # The following assert is not suitable for floating point or for NaN
    # Just a stop-gap for now
    assert (idx_data[0:9] == np.array([5, 0, 4, 1, 9, 2, 1, 3, 1])).all()