Example #1
0
def copy_samples_lmdb(path_lmdb, path_dst, keys, func_data=None):
    """
    Copy select samples from an lmdb into another.
    Can be used for sampling from an lmdb into another and generating a random shuffle
    of lmdb content.
    
    Parameters:
    path_lmdb -- source lmdb
    path_dst -- destination lmdb
    keys -- list of keys or indices to sample from source lmdb
    """
    db = lmdb.open(path_dst, map_size=MAP_SZ)
    key_dst = 0
    with db.begin(write=True) as txn_dst:
        with lmdb.open(path_lmdb, readonly=True).begin() as txn_src:

            for key_src in keys:
                if not isinstance(key_src, basestring):
                    key_src = IDX_FMT.format(key_src)
                if func_data is None:
                    txn_dst.put(IDX_FMT.format(key_dst), txn_src.get(key_src))
                else:
                    txn_dst.put(IDX_FMT.format(key_dst),
                                func_data(txn_src.get(key_src)))
                key_dst += 1
    db.close()
Example #2
0
def _infer_to_lmdb_cur_multi_key(net, keys, n, dbs):
    '''
    Run network inference for n batches and save results to an lmdb for each key.
    Higher time complexity but lower space complexity.

    See _infer_to_lmdb_cur_single_key() if there is only a single key
    '''
    idxs = [0] * len(keys)

    for _ in range(n):
        d = forward(net, keys)
        for ik, k in enumerate(keys):

            with dbs[k].begin(write=True) as txn:

                l = []
                l.extend(d[k].astype(float))

                for x in l:
                    x = expand_dims(x, 3)
                    txn.put(IDX_FMT.format(idxs[ik]),
                            caffe.io.array_to_datum(x).SerializeToString())

                    idxs[ik] += 1
    return idxs
Example #3
0
def matfiles_to_lmdb(paths_src, path_dst, fieldname, lut=None):
    '''
    Generate LMDB file from set of mat files with integer data
    Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045
    credit: Evan Shelhamer
    '''
    db = lmdb.open(path_dst, map_size=MAP_SZ)

    with db.begin(write=True) as in_txn:

        for idx, path_ in enumerate(paths_src):

            content_field = io.loadmat(path_)[fieldname]
            # get shape (1,H,W)
            content_field = expand_dims(content_field, 3)
            content_field = content_field.astype(int)

            if lut is not None:
                content_field = lut(content_field)

            img_dat = caffe.io.array_to_datum(content_field)
            in_txn.put(IDX_FMT.format(idx), img_dat.SerializeToString())

    db.close()

    return 0
Example #4
0
def read_values_at(path_lmdb, key, dtype=None):
    """
    Read key from lmdb
    adapted from Gustav Larsson http://deepdish.io/2015/04/28/creating-lmdb-in-python/
    """
    with lmdb.open(path_lmdb, readonly=True).begin() as txn:

        if not isinstance(key, basestring):
            key = IDX_FMT.format(key)
        dat, x = unpack_raw_datum(txn.get(key), dtype)
        return x, dat.label  # scalar label
Example #5
0
def arrays_to_lmdb(arrs, path_dst):
    '''
    Generate LMDB file from list of ndarrays
    '''
    db = lmdb.open(path_dst, map_size=MAP_SZ)

    with db.begin(write=True) as in_txn:

        for idx, x in enumerate(arrs):
            content_field = expand_dims(x, 3)

            dat = caffe.io.array_to_datum(content_field)
            in_txn.put(IDX_FMT.format(idx), dat.SerializeToString())

    db.close()
    return 0
Example #6
0
def imgs_to_lmdb(paths_src, path_dst):
    '''
    Generate LMDB file from set of images
    Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045
    credit: Evan Shelhamer
    '''

    db = lmdb.open(path_dst, map_size=MAP_SZ)

    with db.begin(write=True) as in_txn:

        for idx, path_ in enumerate(paths_src):
            img = read_img_cv2(path_)
            img_dat = caffe.io.array_to_datum(img)
            in_txn.put(IDX_FMT.format(idx), img_dat.SerializeToString())

    db.close()

    return 0
Example #7
0
def _infer_to_lmdb_cur_single_key(net, key_, n, db):
    '''
    Run network inference for n batches and save results to an lmdb for each key.
    Higher time complexity but lower space complexity.

    Takes advantage if there is only a single key
    '''
    idx = 0

    with db.begin(write=True) as txn:
        for _ in range(n):
            d = forward(net, [key_])
            l = []
            l.extend(d[key_].astype(float))

            for x in l:
                x = expand_dims(x, 3)
                txn.put(IDX_FMT.format(idx), caffe.io.array_to_datum(x).SerializeToString())
                idx += 1
    return [idx]
Example #8
0
def concatenate_lmdb(paths_lmdb, path_dst):
    """
    Copy select samples from an lmdb into another.
    Can be used for sampling from an lmdb into another and generating a random shuffle
    of lmdb content.
    
    Parameters:
    paths_lmdb -- list of lmdbs to conatenate
    path_dst -- destination lmdb
    keys -- list of keys or indices to sample from source lmdb
    """
    db = lmdb.open(path_dst, map_size=MAP_SZ)
    key_dst = 0
    with db.begin(write=True) as txn_dst:
        for p in paths_lmdb:
            with lmdb.open(p, readonly=True).begin() as txn_src:
                for _, value in txn_src.cursor():
                    txn_dst.put(IDX_FMT.format(key_dst), value)
                    key_dst += 1
    db.close()
Example #9
0
def _infer_to_lmdb_cur_single_key(net, key_, n, db):
    '''
    Run network inference for n batches and save results to an lmdb for each key.
    Higher time complexity but lower space complexity.
    
    Takes advantage if there is only a single key
    '''
    idx = 0

    with db.begin(write=True) as txn:
        for _ in range(n):
            d = forward(net, [key_])
            l = []
            l.extend(d[key_].astype(float))

            for x in l:
                x = expand_dims(x, 3)
                txn.put(IDX_FMT.format(idx), caffe.io.array_to_datum(x).SerializeToString())
                idx += 1
    return [idx]
Example #10
0
def scalars_to_lmdb(scalars, path_dst, lut=None):
    '''
    Generate LMDB file from list of scalars
    '''
    db = lmdb.open(path_dst, map_size=MAP_SZ)

    with db.begin(write=True) as in_txn:

        if not hasattr(scalars, '__iter__'):
            scalars = np.array([scalars])

        for idx, x in enumerate(scalars):

            if not hasattr(x, '__iter__'):
                content_field = np.array([x])
            else:
                content_field = np.array(x)

            # validate these are scalars
            if content_field.size != 1:
                raise AttributeError(
                    "Unexpected shape for scalar at i=%d (%s)" %
                    (idx, str(content_field.shape)))

            # guarantee shape (1,1,1)
            content_field = expand_dims(content_field, 3)
            content_field = content_field.astype(int)

            if lut is not None:
                content_field = lut(content_field)

            dat = caffe.io.array_to_datum(content_field)
            in_txn.put(IDX_FMT.format(idx), dat.SerializeToString())

    db.close()

    return 0
Example #11
0
def _infer_to_lmdb_cur_multi_key(net, keys, n, dbs):
    '''
    Run network inference for n batches and save results to an lmdb for each key.
    Higher time complexity but lower space complexity.

    See _infer_to_lmdb_cur_single_key() if there is only a single key
    '''
    idxs = [0] * len(keys)

    for _ in range(n):
        d = forward(net, keys)
        for ik, k in enumerate(keys):

            with dbs[k].begin(write=True) as txn:

                l = []
                l.extend(d[k].astype(float))

                for x in l:
                    x = expand_dims(x, 3)
                    txn.put(IDX_FMT.format(idxs[ik]), caffe.io.array_to_datum(x).SerializeToString())

                    idxs[ik] += 1
    return idxs