def matfiles_to_lmdb(paths_src, path_dst, fieldname, lut=None): ''' Generate LMDB file from set of mat files with integer data Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045 credit: Evan Shelhamer ''' db = lmdb.open(path_dst, map_size=MAP_SZ) with db.begin(write=True) as in_txn: for idx, path_ in enumerate(paths_src): content_field = io.loadmat(path_)[fieldname] # get shape (1,H,W) content_field = expand_dims(content_field, 3) content_field = content_field.astype(int) if lut is not None: content_field = lut(content_field) img_dat = caffe.io.array_to_datum(content_field) in_txn.put(IDX_FMT.format(idx), img_dat.SerializeToString()) db.close() return 0
def arrays_to_h5_fixed(arrs, key, path_dst): ''' save list of arrays (all same size) to hdf5 under a single key ''' with h5py.File(path_dst, "w") as f: f[key] = [expand_dims(x, 3) for x in arrs]
def arrays_to_lmdb(arrs, path_dst): ''' Generate LMDB file from list of ndarrays ''' db = lmdb.open(path_dst, map_size=MAP_SZ) with db.begin(write=True) as in_txn: for idx, x in enumerate(arrs): content_field = expand_dims(x, 3) dat = caffe.io.array_to_datum(content_field) in_txn.put(IDX_FMT.format(idx), dat.SerializeToString()) db.close() return 0
def _infer_to_lmdb_cur_single_key(net, key_, n, db): ''' Run network inference for n batches and save results to an lmdb for each key. Higher time complexity but lower space complexity. Takes advantage if there is only a single key ''' idx = 0 with db.begin(write=True) as txn: for _ in range(n): d = forward(net, [key_]) l = [] l.extend(d[key_].astype(float)) for x in l: x = expand_dims(x, 3) txn.put(IDX_FMT.format(idx), caffe.io.array_to_datum(x).SerializeToString()) idx += 1 return [idx]
def scalars_to_lmdb(scalars, path_dst, lut=None): ''' Generate LMDB file from list of scalars ''' db = lmdb.open(path_dst, map_size=MAP_SZ) with db.begin(write=True) as in_txn: if not hasattr(scalars, '__iter__'): scalars = np.array([scalars]) for idx, x in enumerate(scalars): if not hasattr(x, '__iter__'): content_field = np.array([x]) else: content_field = np.array(x) # validate these are scalars if content_field.size != 1: raise AttributeError("Unexpected shape for scalar at i=%d (%s)" % (idx, str(content_field.shape))) # guarantee shape (1,1,1) content_field = expand_dims(content_field, 3) content_field = content_field.astype(int) if lut is not None: content_field = lut(content_field) dat = caffe.io.array_to_datum(content_field) in_txn.put(IDX_FMT.format(idx), dat.SerializeToString()) db.close() return 0
def _infer_to_lmdb_cur_multi_key(net, keys, n, dbs): ''' Run network inference for n batches and save results to an lmdb for each key. Higher time complexity but lower space complexity. See _infer_to_lmdb_cur_single_key() if there is only a single key ''' idxs = [0] * len(keys) for _ in range(n): d = forward(net, keys) for ik, k in enumerate(keys): with dbs[k].begin(write=True) as txn: l = [] l.extend(d[k].astype(float)) for x in l: x = expand_dims(x, 3) txn.put(IDX_FMT.format(idxs[ik]), caffe.io.array_to_datum(x).SerializeToString()) idxs[ik] += 1 return idxs