Beispiel #1
0
    def test_append_matrix(self):
        in_fpath = join(TEST_DATA_DIR, '1000snps.hdf5')
        array = numpy.array([[1, 1, 1], [2, 2, 2]])
        expected = ([[1, 8, 5], [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3],
                     [1, 1, 1], [2, 2, 2]])
        expected2 = [[1, 1, 1], [2, 2, 2], [1, 8, 5], [3, 5, 3], [6, 0, 4],
                     [7, 4, 2], [4, 2, 3], [1, 1, 1], [2, 2, 2], [1, 8, 5],
                     [3, 5, 3], [6, 0, 4], [7, 4, 2], [4, 2, 3], [1, 1, 1],
                     [2, 2, 2]]
        with NamedTemporaryFile(suffix='.h5') as fhand_out:
            shutil.copy(in_fpath, fhand_out.name)
            hdf5 = VariationsH5(fhand_out.name, mode='r+')
            dset = hdf5['/calls/DP']
            orig_array = dset[()]
            append_matrix(dset, array)
            assert numpy.all(dset[()] == expected)

            append_matrix(dset, dset)

            array2 = numpy.array([[1, 1, 1], [2, 2, 2]])
            append_matrix(array2, dset[()])
            assert numpy.all(expected2 == array2)

        append_matrix(orig_array, array)
        assert numpy.all(orig_array == expected)
Beispiel #2
0
    def _get_mats_for_chunk(self, variations):
        field_paths = variations.keys()
        diff_fields = set(self.keys()).difference(set(field_paths))

        if diff_fields:
            msg = 'Previous matrices do not match matrices in chunk'
            raise ValueError(msg)

        matrices = {}

        for field in field_paths:
            mat1 = variations[field]
            mat2 = self[field]
            self._check_shape_matches(mat1, mat2, field)
            append_matrix(mat2, mat1)
            matrices[field] = mat2
        return matrices
Beispiel #3
0
    def put_chunks(self, chunks, kept_fields=None, ignored_fields=None):
        matrices = None
        if chunks is None:
            return

        for chunk in chunks:
            if matrices is None:
                matrices = self._create_or_get_mats_from_chunk(chunk)
                continue
            # check all chunks have the same number of snps
            nsnps = [chunk[path].data.shape[0]
                     for path in chunk.keys()]
            num_snps = nsnps[0]
            assert all(num_snps == nsnp for nsnp in nsnps)

            for path in chunk.keys():
                dset_chunk = chunk[path]
                dset = matrices[path]
                append_matrix(dset, dset_chunk)

        self._index = None

        if hasattr(self, 'flush'):
            self._h5file.flush()
Beispiel #4
0
    def test_append_matrix(self):
        in_fpath = join(TEST_DATA_DIR, '1000snps.hdf5')
        array = numpy.array([[1, 1, 1], [2, 2, 2]])
        expected = ([[1, 8, 5],
                     [3, 5, 3],
                     [6, 0, 4],
                     [7, 4, 2],
                     [4, 2, 3],
                     [1, 1, 1],
                     [2, 2, 2]])
        expected2 = [[1, 1, 1],
                     [2, 2, 2],
                     [1, 8, 5],
                     [3, 5, 3],
                     [6, 0, 4],
                     [7, 4, 2],
                     [4, 2, 3],
                     [1, 1, 1],
                     [2, 2, 2],
                     [1, 8, 5],
                     [3, 5, 3],
                     [6, 0, 4],
                     [7, 4, 2],
                     [4, 2, 3],
                     [1, 1, 1],
                     [2, 2, 2]]
        with NamedTemporaryFile(suffix='.h5') as fhand_out:
            shutil.copy(in_fpath, fhand_out.name)
            hdf5 = VariationsH5(fhand_out.name, mode='r+')
            dset = hdf5['/calls/DP']
            orig_array = dset.value
            append_matrix(dset, array)
            assert numpy.all(dset.value == expected)

            append_matrix(dset, dset)

            array2 = numpy.array([[1, 1, 1], [2, 2, 2]])
            append_matrix(array2, dset.value)
            assert numpy.all(expected2 == array2)

        append_matrix(orig_array, array)
        assert numpy.all(orig_array == expected)