def test_magic_number_parsing(file_name, file_path, true_metadata): with tempfile.TemporaryDirectory() as dirpath: file_to_read_full_path = gunzip_to_dir(file_name, file_path, dirpath) metadata = rd.get_metadata(file_to_read_full_path) assert metadata == true_metadata
def __init__(self, path_to_image_store, path_to_labels, transform=None): # Load the Data im_metadata = rd.get_metadata(path_to_image_store) lb_metadata = rd.get_metadata(path_to_labels) self.image_data = rd.get_data(path_to_image_store, im_metadata) self.label_data = rd.get_data(path_to_labels, lb_metadata) # Convert to Tensors self.image_data = torch.tensor(self.image_data, dtype=torch.float) self.label_data = torch.tensor(self.label_data, dtype=torch.long) # Fix Dimensions of Images self.image_data = self.image_data.unsqueeze(1) if self.image_data.size(0) != self.label_data.size(0): raise MismatchedDataError( "len(Images) != len(labels). Check loaded data!") self.transform = transform
def process_idx_file(ip_file_name, ip_file_path, is_label=False): with tempfile.TemporaryDirectory() as dirpath: file_to_read_full_path = gunzip_to_dir(ip_file_name, ip_file_path, dirpath) metadata = rd.get_metadata(file_to_read_full_path) idx_data = rd.get_data(file_to_read_full_path, metadata) if is_label: ret_tensor = torch.tensor(idx_data, dtype=torch.long) else: ret_tensor = torch.tensor(idx_data, dtype=torch.float) return ret_tensor
def test_get_data_lecun_idx_matrix(debug=False): file_name = "train-images-idx3-ubyte.gz" file_path = os.path.join("..", "..", "data", "mnist") with tempfile.TemporaryDirectory() as dirpath: file_to_read_full_path = gunzip_to_dir(file_name, file_path, dirpath) metadata = rd.get_metadata(file_to_read_full_path) # Run get_data() idx_data = rd.get_data(file_to_read_full_path, metadata) # Set Debug = True to View the First 3 Images : (5,0,4) if debug: with np.printoptions(threshold=np.inf, linewidth=150): print(idx_data[0:3, :, :]) assert idx_data.shape == (60000, 28, 28)
def test_get_data_lecun_idx_vector(debug=False): file_name = "train-labels-idx1-ubyte.gz" file_path = os.path.join("..", "..", "data", "mnist") with tempfile.TemporaryDirectory() as dirpath: file_to_read_full_path = gunzip_to_dir(file_name, file_path, dirpath) metadata = rd.get_metadata(file_to_read_full_path) # Run get_data() idx_data = rd.get_data(file_to_read_full_path, metadata) if debug: print(idx_data[0:3]) assert idx_data.shape == (60000, ) # The following assert is not suitable for floating point or for NaN # Just a stop-gap for now assert (idx_data[0:9] == np.array([5, 0, 4, 1, 9, 2, 1, 3, 1])).all()