Example #1
0
def matfiles_to_lmdb(paths_src, path_dst, fieldname,
                     lut=None):
    '''
    Generate LMDB file from set of mat files with integer data
    Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045
    credit: Evan Shelhamer
    
    '''    
    db = lmdb.open(path_dst, map_size=MAP_SZ)
    
    with db.begin(write=True) as in_txn:
    
        for idx, path_ in enumerate(paths_src):
            
            content_field = io.loadmat(path_)[fieldname]
            # get shape (1,H,W)
            content_field = expand_dims(content_field, 3)
            content_field = content_field.astype(int)
            
            if lut is not None:
                content_field = lut(content_field)
                
            img_dat = caffe.io.array_to_datum(content_field)
            in_txn.put(IDX_FMT.format(idx), img_dat.SerializeToString())
    
    db.close()

    return 0
Example #2
0
def arrays_to_h5_fixed(arrs, key, path_dst):
    '''
    save list of arrays (all same size) to hdf5 under a single key
    '''
    with h5py.File(path_dst, "w") as f:
        f[key] = [expand_dims(x, 3) for x in arrs]
            
Example #3
0
def arrays_to_lmdb(arrs, path_dst):
    '''
    Generate LMDB file from list of ndarrays    
    '''
    db = lmdb.open(path_dst, map_size=MAP_SZ)
    
    with db.begin(write=True) as in_txn:
    
        for idx, x in enumerate(arrs):
            content_field = expand_dims(x, 3)
            
            dat = caffe.io.array_to_datum(content_field)
            in_txn.put(IDX_FMT.format(idx), dat.SerializeToString())
    
    db.close()

    return 0
Example #4
0
def _infer_to_lmdb_cur_single_key(net, key_, n, db):
    '''
    Run network inference for n batches and save results to an lmdb for each key.
    Higher time complexity but lower space complexity.
    
    Takes advantage if there is only a single key
    '''
    idx = 0
    
    with db.begin(write=True) as txn:
        for _ in range(n):
            d = forward(net, [key_])
            l = []
            l.extend(d[key_].astype(float))
                    
            for x in l:
                x = expand_dims(x, 3)
                txn.put(IDX_FMT.format(idx), caffe.io.array_to_datum(x).SerializeToString())
                idx += 1
    return [idx]
Example #5
0
def scalars_to_lmdb(scalars, path_dst,
                    lut=None):
    '''
    Generate LMDB file from list of scalars
    '''    
    db = lmdb.open(path_dst, map_size=MAP_SZ)
    
    with db.begin(write=True) as in_txn:
        
        if not hasattr(scalars, '__iter__'):
            scalars = np.array([scalars])
        
        for idx, x in enumerate(scalars):
            
            if not hasattr(x, '__iter__'):
                content_field = np.array([x])
            else:
                content_field = np.array(x)
                
            # validate these are scalars
            if content_field.size != 1:
                raise AttributeError("Unexpected shape for scalar at i=%d (%s)"
                                     % (idx, str(content_field.shape)))                
                
            # guarantee shape (1,1,1)
            content_field = expand_dims(content_field, 3)
            content_field = content_field.astype(int)
            
            if lut is not None:
                content_field = lut(content_field)
                
            dat = caffe.io.array_to_datum(content_field)
            in_txn.put(IDX_FMT.format(idx), dat.SerializeToString())
    
    db.close()

    return 0
Example #6
0
def _infer_to_lmdb_cur_multi_key(net, keys, n, dbs):
    '''
    Run network inference for n batches and save results to an lmdb for each key.
    Higher time complexity but lower space complexity.
    
    See _infer_to_lmdb_cur_single_key() if there is only a single key
    '''
    idxs = [0] * len(keys)
    
    for _ in range(n):
        d = forward(net, keys)
        for ik, k in enumerate(keys):
            
            with dbs[k].begin(write=True) as txn:
            
                l = []
                l.extend(d[k].astype(float))
                        
                for x in l:
                    x = expand_dims(x, 3)
                    txn.put(IDX_FMT.format(idxs[ik]), caffe.io.array_to_datum(x).SerializeToString())
                    
                    idxs[ik] += 1
    return idxs