Beispiel #4
def parse_biom_table_hdf5(h5grp, order="observation"):
    """Parse an HDF5 formatted BIOM table

    The expected structure of this group is below. A few basic definitions,
    N is the number of observations and M is the number of samples. Data are
    stored in both compressed sparse row (for observation oriented operations)
    and compressed sparse column (for sample oriented operations).

    ###     THOUGH
    ./id                     : str, an arbitrary ID
    ./type                   : str, the table type (e.g, OTU table)
    ./format-url             : str, a URL that describes the format
    ./format-version         : two element tuple of int32, major and minor
    ./generated-by           : str, what generated this file
    ./creation-date          : str, ISO format
    ./shape                  : two element tuple of int32, N by M
    ./nnz                    : int32 or int64, number of non zero elements
    ./observation            : Group
    ./observation/ids        : (N,) dataset of str or vlen str
    ./observation/data       : (N,) dataset of float64
    ./observation/indices    : (N,) dataset of int32
    ./observation/indptr     : (M+1,) dataset of int32
    [./observation/metadata] : Optional, JSON str, in index order with ids
    ./sample                 : Group
    ./sample/ids             : (M,) dataset of str or vlen str
    ./sample/data            : (M,) dataset of float64
    ./sample/indices         : (M,) dataset of int32
    ./sample/indptr          : (N+1,) dataset of int32
    [./sample/metadata]      : Optional, JSON str, in index order with ids
    h5grp : a h5py ``Group`` or an open h5py ``File``
    order : 'observation' or 'sample' to indicate which data ordering to load
        the table as

        A BIOM ``Table`` object

    See Also

    

    if order not in ("observation", "sample"):
        raise ValueError("Unknown order %s!" % order)

    # fetch all of the IDs
    obs_ids = h5grp["observation/ids"][:]
    samp_ids = h5grp["sample/ids"][:]

    # fetch all of the metadata
    no_md = np.array(["[]"])
    obs_md = json.loads(h5grp["observation"].get("metadata", no_md)[0])
    samp_md = json.loads(h5grp["sample"].get("metadata", no_md)[0])

    # construct the sparse representation
    rep = ScipySparseMat(len(obs_ids), len(samp_ids))

    # load the data
    data_path = partial(os.path.join, order)
    data = h5grp[data_path("data")]
    indices = h5grp[data_path("indices")]
    indptr = h5grp[data_path("indptr")]
    cs = (data, indices, indptr)
    rep._matrix = csc_matrix(cs) if order == "sample" else csr_matrix(cs)

    return table_factory(rep, samp_ids, obs_ids, samp_md or None, obs_md or None)
Beispiel #5
    def test_to_scipy(self):
        """Convert to expected scipy types"""
        vals = {(0,0):5,(0,1):6,(1,0):7,(1,1):8}
        obs = to_scipy(vals)
        exp = ScipySparseMat(2,2)
        exp[(0,0)] = 5
        exp[(0,1)] = 6
        exp[(1,0)] = 7
        exp[(1,1)] = 8

        input = {(0,1):5,(10,8):-1.23}

        exp = ScipySparseMat(11,9)
        exp[(0,1)] = 5
        exp[(10,8)] = -1.23
        obs = to_scipy(input)
        self.assertEqual(sorted(obs.items()), sorted(exp.items()))

        # test transpose
        exp = ScipySparseMat(9,11)
        exp[(1,0)] = 5
        exp[(8,10)] = -1.23
        obs = to_scipy(input, transpose=True)
        self.assertEqual(sorted(obs.items()), sorted(exp.items()))

        # passing a list of dicts, transpose
        exp = ScipySparseMat(3,2)
        exp[(0,0)] = 5.0
        exp[(1,0)] = 6.0
        exp[(2,0)] = 7.0
        exp[(0,1)] = 8.0
        exp[(1,1)] = 9.0
        exp[(2,1)] = 10.0
        obs = to_scipy([{(0,0):5,(0,1):6,(0,2):7},
        self.assertEqual(sorted(obs.items()), sorted(exp.items()))

        # passing a list of ScipySparseMats
        exp = ScipySparseMat(2,3)
        exp[(0,0)] = 5
        exp[(0,1)] = 6
        exp[(0,2)] = 7
        exp[(1,0)] = 8
        exp[(1,1)] = 9
        exp[(1,2)] = 10
        row1 = ScipySparseMat(1,3)
        row1[(0,0)] = 5
        row1[(0,1)] = 6
        row1[(0,2)] = 7
        row2 = ScipySparseMat(1,3)
        row2[(0,0)] = 8
        row2[(0,1)] = 9
        row2[(0,2)] = 10
        obs = to_scipy([row1, row2])
        self.assertEqual(sorted(obs.items()), sorted(exp.items())) 

        # test empty set
        exp = ScipySparseMat(0,0)
        obs = to_scipy([])
        self.assertEqual(sorted(obs.items()), sorted(exp.items()))
Beispiel #6
class ScipySparseMatTests(TestCase):
    def setUp(self):
        # 1 0 2
        # 3 0 4
        self.mat1 = ScipySparseMat(2,3,data=array([[1,0,2],[3,0,4]]))

        # Empty/null cases (i.e., 0x0, 0xn, nx0).
        self.null1 = ScipySparseMat(0,0)
        self.null2 = ScipySparseMat(0,42)
        self.null3 = ScipySparseMat(42,0)
        self.nulls = [self.null1, self.null2, self.null3]

        # 0 0
        # 0 0
        self.empty = ScipySparseMat(2,2)

        # 1 0 3
        self.row_vec = ScipySparseMat(1,3)
        self.row_vec[0,0] = 1
        self.row_vec[0,2] = 3

        # 1
        # 0
        # 3
        self.col_vec = ScipySparseMat(3,1)
        self.col_vec[0,0] = 1
        self.col_vec[2,0] = 3

        # 1x1
        self.single_ele = ScipySparseMat(1,1)
        self.single_ele[0,0] = 42

        # Explicit zeros.
        self.explicit_zeros = ScipySparseMat(2,3,

    def test_convertVectorToDense(self):
        """Properly converts ScipySparseMat vectors to dense numpy repr."""
        exp = array([1, 0, 3])
        obs = ScipySparseMat.convertVectorToDense(self.row_vec)
        self.assertEqual(obs, exp)

        exp = array([1, 0, 3])
        obs = ScipySparseMat.convertVectorToDense(self.col_vec)
        self.assertEqual(obs, exp)

        exp = array([42])
        obs = ScipySparseMat.convertVectorToDense(self.single_ele)
        self.assertEqual(obs, exp)

    def test_is_empty(self):
        """Differentiate empty matrix from non-empty matrix."""
        for m in self.nulls:


    def test_shape(self):
        """What kind of shape are you in?"""
        self.assertEqual(self.null1.shape, (0,0))
        self.assertEqual(self.null2.shape, (0,42))
        self.assertEqual(self.null3.shape, (42,0))
        self.assertEqual(self.mat1.shape, (2,3))
        self.assertEqual(self.empty.shape, (2,2))
        self.assertEqual(self.row_vec.shape, (1,3))
        self.assertEqual(self.col_vec.shape, (3,1))
        self.assertEqual(self.single_ele.shape, (1,1))

    def test_dtype(self):
        """What's your type?"""
        for m in self.nulls:
            self.assertEqual(m.dtype, None)

        self.assertEqual(self.empty.dtype, float)
        self.assertEqual(self.row_vec.dtype, float)

    def test_fmt(self):
        """What format are you in?"""
        for m in self.nulls:
            self.assertEqual(m.fmt, None)

        self.assertEqual(self.empty.fmt, 'coo')
        self.assertEqual(self.mat1.fmt, 'csr')
        self.assertEqual(self.single_ele.fmt, 'lil')

    def test_size(self):
        """What is your NNZ?"""
        for m in self.nulls:
            self.assertEqual(m.size, 0)

        self.assertEqual(self.empty.size, 0)
        self.assertEqual(self.single_ele.size, 1)
        self.assertEqual(self.mat1.size, 4)
        self.assertEqual(self.explicit_zeros.size, 4)

    def test_convert(self):
        """Test sparse format conversion."""
        self.assertEqual(self.mat1.fmt, 'csr')
        self.assertEqual(self.mat1.fmt, 'coo')

        for m in self.nulls:
            self.assertEqual(m.fmt, None)
            self.assertEqual(m.fmt, None)

    def test_transpose(self):
        """Test transposition."""
        obs = self.null1.T
        self.assertEqual(obs, self.null1)
        self.assertFalse(obs is self.null1)

        obs = self.null2.T
        self.assertEqual(obs, self.null3)
        self.assertFalse(obs is self.null3)

        obs = self.null3.T
        self.assertEqual(obs, self.null2)
        self.assertFalse(obs is self.null2)

        obs = self.single_ele.T
        self.assertEqual(obs, self.single_ele)
        self.assertFalse(obs is self.single_ele)

        exp = ScipySparseMat(3,2,data=array([[1,3],[0,0],[2,4]]))
        obs = self.mat1.T
        self.assertEqual(obs, exp)

    def test_sum(self):
        """Test summing a matrix."""
        for m in self.nulls:
            self.assertEqual(m.sum(), 0)

        self.assertEqual(self.mat1.sum(), 10)
        self.assertEqual(self.mat1.sum(0), array([4,0,6]))
        self.assertEqual(self.mat1.sum(1), array([3,7]))
        self.assertEqual(self.row_vec.sum(1), array([4]))
        self.assertEqual(self.col_vec.sum(0), array([4]))
        with self.assertRaises(ValueError):
            _ = self.mat1.sum(3)

    def test_getRow(self):
        """Test grabbing a row from the matrix."""
        for m in self.nulls:
            with self.assertRaises(IndexError):
                _ = m.getRow(0)

        exp = ScipySparseMat(1,3,data=array([[1,0,2]]))
        obs = self.mat1.getRow(0)
        self.assertEqual(obs, exp)

        self.assertEqual(self.row_vec.getRow(0), self.row_vec)

    def test_getCol(self):
        """Test grabbing a column from the matrix."""
        for m in self.nulls:
            with self.assertRaises(IndexError):
                _ = m.getCol(0)

        exp = ScipySparseMat(2,1,data=array([[1],[3]]))
        obs = self.mat1.getCol(0)
        self.assertEqual(obs, exp)

        self.assertEqual(self.col_vec.getCol(0), self.col_vec)

    def test_items_iteritems(self):
        """Test getting a list of non-zero elements."""
        exp = []
        for m in self.nulls + [self.empty]:
            self.assertEqual(m.items(), exp)
            self.assertEqual(list(m.iteritems()), exp)

        exp = [((0,0),1),((0,2),2),((1,0),3),((1,2),4)]
        self.assertEqual(sorted(self.mat1.items()), exp)
        self.assertEqual(sorted(self.mat1.iteritems()), exp)

    def test_copy(self):
        """Test copying the matrix."""
        for m in self.nulls:
            copy = m.copy()
            self.assertEqual(copy, m)
            self.assertFalse(copy is m)

        copy = self.mat1.copy()
        self.assertEqual(copy, self.mat1)
        self.assertFalse(copy is self.mat1)

        copy[1,1] = 42
        self.assertNotEqual(copy, self.mat1)

    def test_eq(self):
        """Test whether two matrices are equal."""
        self.assertTrue(self.null1 == ScipySparseMat(0,0))
        self.assertTrue(self.null2 == ScipySparseMat(0,42))
        self.assertTrue(self.null3 == ScipySparseMat(42,0))
        self.assertTrue(self.empty == ScipySparseMat(2,2))

        mat2 = ScipySparseMat(2,3,data=array([[1,0,2],[3,0,4]]))
        self.assertTrue(self.mat1 == mat2)

        # Sparse format shouldn't matter.
        self.assertNotEqual(self.mat1.fmt, mat2.fmt)
        self.assertTrue(self.mat1 == mat2)

        # Equality works in both directions.
        self.assertTrue(mat2 == self.mat1)

    def test_ne(self):
        """Test whether two matrices are not equal."""
        # Wrong type.
        self.assertTrue(self.null1 != array([]))

        # Wrong shape.
        self.assertTrue(self.null2 != self.null3)
        self.assertTrue(self.empty != ScipySparseMat(2,1))

        # Wrong dtype.
        self.assertTrue(self.empty != ScipySparseMat(2,2,dtype=int))

        # Wrong size.
        wrong_size = ScipySparseMat(2,2)
        self.assertTrue(self.empty == wrong_size)
        wrong_size[1,0] = 42
        self.assertTrue(self.empty != wrong_size)

        # Wrong size.
        wrong_data = self.mat1.copy()
        self.assertTrue(self.mat1 == wrong_data)
        wrong_data[0,2] = 42
        self.assertTrue(self.mat1 != wrong_data)
        self.assertTrue(wrong_data != self.mat1)

    def test_str(self):
        """Test getting string representation of the matrix."""
        for m in self.nulls:
            self.assertEqual(str(m), '<%dx%d empty/null sparse matrix>' %
                             (m.shape[0], m.shape[1]))

    def test_setitem(self):
        """Test setting an element in the matrix."""
        for m in self.nulls:
            with self.assertRaises(IndexError):
                m[0,0] = 42

        with self.assertRaises(IndexError):
            self.empty[0] = [42,42]

        with self.assertRaises(ValueError):
            self.mat1[0,0] = 0

        with self.assertRaises(ValueError):
            self.mat1[0,0] = 0.0

        # Setting existing zero element doesn't change matrix.
        copy = self.mat1.copy()
        copy[0,1] = 0.0
        self.assertEqual(copy, self.mat1)

        # Setting existing nonzero element to the same thing doesn't change
        # matrix.
        copy = self.mat1.copy()
        copy[0,0] = 1.0
        self.assertEqual(copy, self.mat1)

        # nonzero element -> nonzero element
        copy = self.mat1.copy()
        copy[0,0] = 42
        self.assertNotEqual(copy, self.mat1)
        self.assertEqual(copy[0,0], 42)

        # zero element -> nonzero element
        copy = self.mat1.copy()
        copy[0,1] = 42
        self.assertNotEqual(copy, self.mat1)
        self.assertEqual(copy[0,1], 42)

    def test_getitem(self):
        """Test getting an element from the matrix."""
        for m in self.nulls:
            with self.assertRaises(IndexError):
                _ = m[0,0]

        with self.assertRaises(IndexError):
            _ = self.empty[0]

        with self.assertRaises(IndexError):
            _ = self.empty[:,:]

        with self.assertRaises(IndexError):
            _ = self.empty[0:1,0]

        with self.assertRaises(IndexError):
            _ = self.empty[0,0:1]

        exp = ScipySparseMat(2,1)
        obs = self.empty[:,0]
        self.assertEqual(obs, exp)

        # Extracting a column.
        obs = self.mat1[:,2]
        self.assertEqual(obs, self.mat1.getCol(2))

        # Extracting a row.
        obs = self.mat1[1,:]
        self.assertEqual(obs, self.mat1.getRow(1))

        # Extracting a single element.
        self.assertEqual(self.empty[1,1], 0)
        self.assertEqual(self.mat1[1,2], 4)

        with self.assertRaises(IndexError):
            _ = self.mat1[1,3]