Example #1
0
    def test_arr(self, mock_dat, mock_caffe):

        # expected serialization of the test image
        s = [
            '\x08\x03\x10\x04\x18\x02"\x18\x01\x04\x07\n\r\x10\x13\x16\x02\x05\x08\x0b\x0e\x11\x14\x17\x03\x06\t\x0c\x0f\x12\x15\x18(\x00',
            '\x08\x03\x10\x04\x18\x02"\x18\x02\x05\x08\x0b\x0e\x11\x14\x17\x03\x06\t\x0c\x0f\x12\x15\x18\x04\x07\n\r\x10\x13\x16\x19(\x00',
        ]

        # mock caffe calls made by our module
        mock_dat.return_value.SerializeToString = MagicMock(side_effect=s)
        mock_caffe.io.array_to_datum.return_value = caffe.proto.caffe_pb2.Datum(
        )

        # use the module and test it
        path_lmdb = os.path.join(self.dir_tmp, 'xarr2_lmdb')
        tol.arrays_to_lmdb(self.arr, path_lmdb)
        assert_true(os.path.isdir(path_lmdb), "failed to save LMDB")

        count = 0
        with lmdb.open(path_lmdb, readonly=True).begin() as txn:
            for key, value in txn.cursor():

                assert_equal(key, tol.IDX_FMT.format(count), "Unexpected key.")
                assert_equal(value, s[count], "Unexpected content.")
                count += 1
        assert_equal(count, 2, "Unexpected number of samples.")
Example #2
0
    def setup_class(self):

        self.dir_tmp = tempfile.mkdtemp()

        x = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]],
                      [
                          [13, 14, 15],
                          [16, 17, 18],
                      ], [[19, 20, 21], [22, 23, 24]]])
        tol.arrays_to_lmdb([y for y in x],
                           os.path.join(self.dir_tmp, 'x_lmdb'))
Example #3
0
    def setup_class(self):

        self.dir_tmp = tempfile.mkdtemp()

        x = np.array([[[ 0, 2, 3],
                       [ 4, 5, 6]
                       ],
                      [[ 7, 8, 9],
                       [10, 11, 12]
                       ],
                      [[13, 14, 15],
                       [16, 17, 18],
                       ],
                      [[19, 20, 21],
                       [22, 23, 0]
                       ]
                      ])

        tol.arrays_to_lmdb([y for y in x], os.path.join(self.dir_tmp, 'x_lmdb'))
Example #4
0
def infer_to_lmdb(net, keys, n, dst_prefix):
    """
    Run network inference for n batches and save results to an lmdb for each key.
    Lower time complexity but much higher space complexity.

    Not recommended for large datasets or large number of keys
    See: infer_to_lmdb_cur() for slower alternative with less memory overhead

    lmdb cannot preserve batches
    """
    dc = {k:[] for k in keys}
    for _ in range(n):
        d = forward(net, keys)
        for k in keys:
            dc[k].extend(np.copy(d[k].astype(float)))

    for k in keys:
        to_lmdb.arrays_to_lmdb(dc[k], dst_prefix % (k,))

    return [len(dc[k]) for k in keys]
Example #5
0
def infer_to_lmdb(net, keys, n, dst_prefix):
    """
    Run network inference for n batches and save results to an lmdb for each key.
    Lower time complexity but much higher space complexity.

    Not recommended for large datasets or large number of keys
    See: infer_to_lmdb_cur() for slower alternative with less memory overhead

    lmdb cannot preserve batches
    """
    dc = {k: [] for k in keys}
    for _ in range(n):
        d = forward(net, keys)
        for k in keys:
            dc[k].extend(np.copy(d[k].astype(float)))

    for k in keys:
        to_lmdb.arrays_to_lmdb(dc[k], dst_prefix % (k, ))

    return [len(dc[k]) for k in keys]
Example #6
0
def nyudv2_to_lmdb(path_mat, dst_prefix, dir_dst, val_list=None):

    val_list = val_list or []
    if not os.path.isfile(path_mat):
        raise IOError("Path is not a regular file (%s)" % path_mat)

    _, ext = os.path.splitext(path_mat)

    if ext != '.mat' and ext != '.h5' and ext != '.hdf5':
        raise IOError("Invalid file type, expecting mat/h5/hdf5 file (%s)" %
                      path_mat)

    try:
        data = io.loadmat(path_mat)
    except (ValueError, NotImplementedError):
        data = h5py.File(
            path_mat)  # support version >= 7.3 matfile HDF5 format
        pass

    lmdb_info = []
    train_idx = None

    for typ in [
            NYUDV2DataType.IMAGES, NYUDV2DataType.LABELS, NYUDV2DataType.DEPTHS
    ]:

        if typ == NYUDV2DataType.IMAGES:

            dat = [mu.cwh_to_chw(x).astype(np.float) for x in data[typ]]

        elif typ == NYUDV2DataType.LABELS:

            dat = np.expand_dims(data[typ], axis=1).astype(int)
            dat = big_arr_to_arrs(dat)

        elif typ == NYUDV2DataType.DEPTHS:

            dat = np.expand_dims(data[typ], axis=1).astype(np.float)
            dat = big_arr_to_arrs(dat)

        else:
            raise ValueError("unknown NYUDV2DataType")

        if train_idx is None:
            train_idx, val_idx = get_train_val_split_from_idx(
                len(dat), val_list)
            shuffle(train_idx)
            print(train_idx)

    #     # len(ndarray) same as ndarray.shape[0]
    #     if  len(labels) != len(imgs):
    #         raise ValueError("No. of images != no. of labels. (%d) != (%d)",
    #                          len(imgs), len(labels))
    #
    #     if  len(labels) != len(depths):
    #         raise ValueError("No. of depths != no. of labels. (%d) != (%d)",
    #                          len(depths), len(labels))

        print typ, len(dat), dat[0].shape

        fpath_lmdb = os.path.join(dir_dst,
                                  '%s%s_train_lmdb' % (dst_prefix, typ))
        to_lmdb.arrays_to_lmdb([dat[i] for i in train_idx], fpath_lmdb)

        lmdb_info.append((len(train_idx), fpath_lmdb))

        fpath_lmdb = os.path.join(dir_dst, '%s%s_val_lmdb' % (dst_prefix, typ))
        to_lmdb.arrays_to_lmdb([dat[i] for i in val_idx], fpath_lmdb)

        lmdb_info.append((len(val_idx), fpath_lmdb))

    return lmdb_info
Example #7
0
def nyudv2_to_lmdb(path_mat,
                   dst_prefix,
                   dir_dst,
                   val_list=None):

    val_list = val_list or []
    if not os.path.isfile(path_mat):
        raise IOError("Path is not a regular file (%s)" % path_mat)

    _, ext = os.path.splitext(path_mat)

    if ext != '.mat' and ext != '.h5'  and ext != '.hdf5' :
        raise IOError("Invalid file type, expecting mat/h5/hdf5 file (%s)" % path_mat)

    try:
        data = io.loadmat(path_mat)
    except (ValueError, NotImplementedError):
        data = h5py.File(path_mat)  # support version >= 7.3 matfile HDF5 format
        pass

    lmdb_info = []
    train_idx = None

    for typ in [NYUDV2DataType.IMAGES,
                NYUDV2DataType.LABELS,
                NYUDV2DataType.DEPTHS]:

        if typ == NYUDV2DataType.IMAGES:

            dat = [mu.cwh_to_chw(x).astype(np.float) for x in data[typ]]

        elif typ == NYUDV2DataType.LABELS:

            dat = np.expand_dims(data[typ], axis=1).astype(int)
            dat = big_arr_to_arrs(dat)

        elif typ == NYUDV2DataType.DEPTHS:

            dat = np.expand_dims(data[typ], axis=1).astype(np.float)
            dat = big_arr_to_arrs(dat)

        else:
            raise ValueError("unknown NYUDV2DataType")


        if train_idx is None:
            train_idx, val_idx = get_train_val_split_from_idx(len(dat), val_list)
            shuffle(train_idx)
            print(train_idx)

    #     # len(ndarray) same as ndarray.shape[0]
    #     if  len(labels) != len(imgs):
    #         raise ValueError("No. of images != no. of labels. (%d) != (%d)",
    #                          len(imgs), len(labels))
    #
    #     if  len(labels) != len(depths):
    #         raise ValueError("No. of depths != no. of labels. (%d) != (%d)",
    #                          len(depths), len(labels))

        print typ, len(dat), dat[0].shape

        fpath_lmdb = os.path.join(dir_dst, '%s%s_train_lmdb' % (dst_prefix, typ))
        to_lmdb.arrays_to_lmdb([dat[i] for i in train_idx], fpath_lmdb)

        lmdb_info.append((len(train_idx), fpath_lmdb))

        fpath_lmdb = os.path.join(dir_dst, '%s%s_val_lmdb' % (dst_prefix, typ))
        to_lmdb.arrays_to_lmdb([dat[i] for i in val_idx], fpath_lmdb)

        lmdb_info.append((len(val_idx), fpath_lmdb))

    return lmdb_info