def test_arr(self, mock_dat, mock_caffe): # expected serialization of the test image s = [ '\x08\x03\x10\x04\x18\x02"\x18\x01\x04\x07\n\r\x10\x13\x16\x02\x05\x08\x0b\x0e\x11\x14\x17\x03\x06\t\x0c\x0f\x12\x15\x18(\x00', '\x08\x03\x10\x04\x18\x02"\x18\x02\x05\x08\x0b\x0e\x11\x14\x17\x03\x06\t\x0c\x0f\x12\x15\x18\x04\x07\n\r\x10\x13\x16\x19(\x00', ] # mock caffe calls made by our module mock_dat.return_value.SerializeToString = MagicMock(side_effect=s) mock_caffe.io.array_to_datum.return_value = caffe.proto.caffe_pb2.Datum( ) # use the module and test it path_lmdb = os.path.join(self.dir_tmp, 'xarr2_lmdb') tol.arrays_to_lmdb(self.arr, path_lmdb) assert_true(os.path.isdir(path_lmdb), "failed to save LMDB") count = 0 with lmdb.open(path_lmdb, readonly=True).begin() as txn: for key, value in txn.cursor(): assert_equal(key, tol.IDX_FMT.format(count), "Unexpected key.") assert_equal(value, s[count], "Unexpected content.") count += 1 assert_equal(count, 2, "Unexpected number of samples.")
def setup_class(self): self.dir_tmp = tempfile.mkdtemp() x = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]], [ [13, 14, 15], [16, 17, 18], ], [[19, 20, 21], [22, 23, 24]]]) tol.arrays_to_lmdb([y for y in x], os.path.join(self.dir_tmp, 'x_lmdb'))
def setup_class(self): self.dir_tmp = tempfile.mkdtemp() x = np.array([[[ 0, 2, 3], [ 4, 5, 6] ], [[ 7, 8, 9], [10, 11, 12] ], [[13, 14, 15], [16, 17, 18], ], [[19, 20, 21], [22, 23, 0] ] ]) tol.arrays_to_lmdb([y for y in x], os.path.join(self.dir_tmp, 'x_lmdb'))
def infer_to_lmdb(net, keys, n, dst_prefix): """ Run network inference for n batches and save results to an lmdb for each key. Lower time complexity but much higher space complexity. Not recommended for large datasets or large number of keys See: infer_to_lmdb_cur() for slower alternative with less memory overhead lmdb cannot preserve batches """ dc = {k:[] for k in keys} for _ in range(n): d = forward(net, keys) for k in keys: dc[k].extend(np.copy(d[k].astype(float))) for k in keys: to_lmdb.arrays_to_lmdb(dc[k], dst_prefix % (k,)) return [len(dc[k]) for k in keys]
def infer_to_lmdb(net, keys, n, dst_prefix): """ Run network inference for n batches and save results to an lmdb for each key. Lower time complexity but much higher space complexity. Not recommended for large datasets or large number of keys See: infer_to_lmdb_cur() for slower alternative with less memory overhead lmdb cannot preserve batches """ dc = {k: [] for k in keys} for _ in range(n): d = forward(net, keys) for k in keys: dc[k].extend(np.copy(d[k].astype(float))) for k in keys: to_lmdb.arrays_to_lmdb(dc[k], dst_prefix % (k, )) return [len(dc[k]) for k in keys]
def nyudv2_to_lmdb(path_mat, dst_prefix, dir_dst, val_list=None): val_list = val_list or [] if not os.path.isfile(path_mat): raise IOError("Path is not a regular file (%s)" % path_mat) _, ext = os.path.splitext(path_mat) if ext != '.mat' and ext != '.h5' and ext != '.hdf5': raise IOError("Invalid file type, expecting mat/h5/hdf5 file (%s)" % path_mat) try: data = io.loadmat(path_mat) except (ValueError, NotImplementedError): data = h5py.File( path_mat) # support version >= 7.3 matfile HDF5 format pass lmdb_info = [] train_idx = None for typ in [ NYUDV2DataType.IMAGES, NYUDV2DataType.LABELS, NYUDV2DataType.DEPTHS ]: if typ == NYUDV2DataType.IMAGES: dat = [mu.cwh_to_chw(x).astype(np.float) for x in data[typ]] elif typ == NYUDV2DataType.LABELS: dat = np.expand_dims(data[typ], axis=1).astype(int) dat = big_arr_to_arrs(dat) elif typ == NYUDV2DataType.DEPTHS: dat = np.expand_dims(data[typ], axis=1).astype(np.float) dat = big_arr_to_arrs(dat) else: raise ValueError("unknown NYUDV2DataType") if train_idx is None: train_idx, val_idx = get_train_val_split_from_idx( len(dat), val_list) shuffle(train_idx) print(train_idx) # # len(ndarray) same as ndarray.shape[0] # if len(labels) != len(imgs): # raise ValueError("No. of images != no. of labels. (%d) != (%d)", # len(imgs), len(labels)) # # if len(labels) != len(depths): # raise ValueError("No. of depths != no. of labels. (%d) != (%d)", # len(depths), len(labels)) print typ, len(dat), dat[0].shape fpath_lmdb = os.path.join(dir_dst, '%s%s_train_lmdb' % (dst_prefix, typ)) to_lmdb.arrays_to_lmdb([dat[i] for i in train_idx], fpath_lmdb) lmdb_info.append((len(train_idx), fpath_lmdb)) fpath_lmdb = os.path.join(dir_dst, '%s%s_val_lmdb' % (dst_prefix, typ)) to_lmdb.arrays_to_lmdb([dat[i] for i in val_idx], fpath_lmdb) lmdb_info.append((len(val_idx), fpath_lmdb)) return lmdb_info
def nyudv2_to_lmdb(path_mat, dst_prefix, dir_dst, val_list=None): val_list = val_list or [] if not os.path.isfile(path_mat): raise IOError("Path is not a regular file (%s)" % path_mat) _, ext = os.path.splitext(path_mat) if ext != '.mat' and ext != '.h5' and ext != '.hdf5' : raise IOError("Invalid file type, expecting mat/h5/hdf5 file (%s)" % path_mat) try: data = io.loadmat(path_mat) except (ValueError, NotImplementedError): data = h5py.File(path_mat) # support version >= 7.3 matfile HDF5 format pass lmdb_info = [] train_idx = None for typ in [NYUDV2DataType.IMAGES, NYUDV2DataType.LABELS, NYUDV2DataType.DEPTHS]: if typ == NYUDV2DataType.IMAGES: dat = [mu.cwh_to_chw(x).astype(np.float) for x in data[typ]] elif typ == NYUDV2DataType.LABELS: dat = np.expand_dims(data[typ], axis=1).astype(int) dat = big_arr_to_arrs(dat) elif typ == NYUDV2DataType.DEPTHS: dat = np.expand_dims(data[typ], axis=1).astype(np.float) dat = big_arr_to_arrs(dat) else: raise ValueError("unknown NYUDV2DataType") if train_idx is None: train_idx, val_idx = get_train_val_split_from_idx(len(dat), val_list) shuffle(train_idx) print(train_idx) # # len(ndarray) same as ndarray.shape[0] # if len(labels) != len(imgs): # raise ValueError("No. of images != no. of labels. (%d) != (%d)", # len(imgs), len(labels)) # # if len(labels) != len(depths): # raise ValueError("No. of depths != no. of labels. (%d) != (%d)", # len(depths), len(labels)) print typ, len(dat), dat[0].shape fpath_lmdb = os.path.join(dir_dst, '%s%s_train_lmdb' % (dst_prefix, typ)) to_lmdb.arrays_to_lmdb([dat[i] for i in train_idx], fpath_lmdb) lmdb_info.append((len(train_idx), fpath_lmdb)) fpath_lmdb = os.path.join(dir_dst, '%s%s_val_lmdb' % (dst_prefix, typ)) to_lmdb.arrays_to_lmdb([dat[i] for i in val_idx], fpath_lmdb) lmdb_info.append((len(val_idx), fpath_lmdb)) return lmdb_info