예제 #1
0
    def test_view(self):

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # data has wrong dimensions
        data = diploid_genotype_data  # use GenotypeArray instead
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # haploid data
        h = np.array(haplotype_data).view(HaplotypeArray)
        aeq(haplotype_data, h)
        eq(np.int, h.dtype)
        eq(2, h.ndim)
        eq(4, h.n_variants)
        eq(3, h.n_haplotypes)
예제 #2
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            AlleleCountsDaskArray.from_array()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # valid data (typed)
        hd = self.setup_instance(np.array(allele_counts_data, dtype='u2'))
        aeq(allele_counts_data, hd)
        eq(np.uint16, hd.dtype)
예제 #3
0
    def test_to_hdf5_group(self):

        # setup HDF5 file
        node_path = 'test'
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()
        a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
        # reorder columns because will come back out in sorted order
        a = a[sorted(a.dtype.names)]
        vt = self.setup_instance(a)

        # write using file path and node path
        vt.to_hdf5_group(file_path, node_path)

        with h5py.File(file_path, mode='r') as h5f:
            h5g = h5f[node_path]
            eq(sorted(a.dtype.names), sorted(h5g.keys()))
            for n in a.dtype.names:
                aeq(a[n], h5g[n][:])

        # write using group and node path
        with h5py.File(file_path, mode='w') as h5f:
            vt.to_hdf5_group(h5f, node_path)

        with h5py.File(file_path, mode='r') as h5f:
            h5g = h5f[node_path]
            eq(sorted(a.dtype.names), sorted(h5g.keys()))
            for n in a.dtype.names:
                aeq(a[n], h5g[n][:])
예제 #4
0
    def test_from_hdf5_group(self):

        # setup HDF5 file
        node_path = 'test'
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()
        a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
        # reorder columns because will come back out in sorted order
        a = a[sorted(a.dtype.names)]
        with h5py.File(file_path, mode='w') as h5f:
            h5g = h5f.create_group(node_path)
            for n in a.dtype.names:
                h5g.create_dataset(n,
                                   data=a[n],
                                   chunks=True,
                                   compression='gzip')

        # file and node path
        vt = self._class.from_hdf5_group(file_path, node_path)
        self.assertIsInstance(vt, self._class)
        aeq(a, vt[:])

        # dataset
        with h5py.File(file_path, mode='r') as h5f:
            h5g = h5f[node_path]
            vt = self._class.from_hdf5_group(h5g)
            self.assertIsInstance(vt, self._class)
            aeq(a, vt[:])
예제 #5
0
    def test_mean_pairwise_diversity(self):

        # start with simplest case, two haplotypes, one pairwise comparison
        h = HaplotypeArray([[0, 0],
                            [1, 1],
                            [0, 1],
                            [1, 2],
                            [0, -1],
                            [-1, -1]])
        ac = h.count_alleles()
        expect = [0, 0, 1, 1, -1, -1]
        actual = allel.mean_pairwise_difference(ac, fill=-1)
        aeq(expect, actual)

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        ac = h.count_alleles()
        expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        actual = allel.mean_pairwise_difference(ac, fill=-1)
        assert_array_almost_equal(expect, actual)
예제 #6
0
    def test_to_hdf5(self):

        # setup HDF5 file
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()

        # setup genotype array
        node_path = 'test'
        g = GenotypeCArray(diploid_genotype_data, dtype='i1')

        # write using file path and node path
        g.to_hdf5(file_path, node_path)

        # test outcome
        with h5py.File(file_path, mode='r') as h5f:
            h5d = h5f[node_path]
            aeq(g[:], h5d[:])

        # write using group
        with h5py.File(file_path, mode='w') as h5f:
            g.to_hdf5(h5f, node_path)

        # test outcome
        with h5py.File(file_path, mode='r') as h5f:
            h5d = h5f[node_path]
            aeq(g[:], h5d[:])
예제 #7
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            HaplotypeArray()

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # data has wrong dimensions
        data = diploid_genotype_data  # use GenotypeArray instead
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # haploid data (typed)
        h = HaplotypeArray(haplotype_data, dtype='i1')
        aeq(haplotype_data, h)
        eq(np.int8, h.dtype)
예제 #8
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            AlleleCountsChunkedArray()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(TypeError):
            AlleleCountsChunkedArray(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(TypeError):
            AlleleCountsChunkedArray(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(TypeError):
            AlleleCountsChunkedArray(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])
        with assert_raises(TypeError):
            AlleleCountsChunkedArray(data)

        # typed data (typed)
        ac = AlleleCountsChunkedArray(np.array(allele_counts_data, dtype='u1'))
        aeq(allele_counts_data, ac)
        eq(np.uint8, ac.dtype)
예제 #9
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            AlleleCountsDaskArray.from_array()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])
        with assert_raises(ValueError):
            AlleleCountsDaskArray.from_array(data)

        # valid data (typed)
        hd = self.setup_instance(np.array(allele_counts_data, dtype='u2'))
        aeq(allele_counts_data, hd)
        eq(np.uint16, hd.dtype)
예제 #10
0
    def test_from_hdf5_condition(self):

        # setup HDF5 file
        node_path = 'test'
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()
        with h5py.File(file_path, mode='w') as h5f:
            h5f.create_dataset(node_path,
                               data=diploid_genotype_data,
                               chunks=(2, 3, 2))

        # selection
        condition = [False, True, False, True, False]

        # file and node path
        g = GenotypeCArray.from_hdf5(file_path, node_path, condition=condition)
        expect = GenotypeArray(diploid_genotype_data).compress(condition,
                                                               axis=0)
        aeq(expect, g)

        # dataset
        with h5py.File(file_path, mode='r') as h5f:
            dataset = h5f[node_path]
            g = GenotypeCArray.from_hdf5(dataset, condition=condition)
            aeq(expect, g)
예제 #11
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            AlleleCountsArray()

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # data has wrong dimensions
        data = diploid_genotype_data
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # valid data (typed)
        ac = AlleleCountsArray(allele_counts_data, dtype='u1')
        aeq(allele_counts_data, ac)
        eq(np.uint8, ac.dtype)
예제 #12
0
    def test_to_hdf5(self):

        # setup HDF5 file
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()

        # setup genotype array
        node_path = 'test'
        g = GenotypeCArray(diploid_genotype_data, dtype='i1')

        # write using file path and node path
        g.to_hdf5(file_path, node_path)

        # test outcome
        with h5py.File(file_path, mode='r') as h5f:
            h5d = h5f[node_path]
            aeq(g[:], h5d[:])

        # write using group
        with h5py.File(file_path, mode='w') as h5f:
            g.to_hdf5(h5f, node_path)

        # test outcome
        with h5py.File(file_path, mode='r') as h5f:
            h5d = h5f[node_path]
            aeq(g[:], h5d[:])
예제 #13
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            HaplotypeChunkedArray()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(TypeError):
            HaplotypeChunkedArray(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(TypeError):
            HaplotypeChunkedArray(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(TypeError):
            HaplotypeChunkedArray(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])  # use GenotypeCArray instead
        with assert_raises(TypeError):
            HaplotypeChunkedArray(data)

        # typed data (typed)
        h = HaplotypeChunkedArray(np.array(haplotype_data, dtype='i1'))
        aeq(haplotype_data, h)
        eq(np.int8, h.dtype)
예제 #14
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            HaplotypeDaskArray.from_array()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])  # use GenotypeDaskArray instead
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # valid data (typed)
        hd = self.setup_instance(np.array(haplotype_data, dtype='i1'))
        aeq(haplotype_data, hd)
        eq(np.int8, hd.dtype)
예제 #15
0
    def test_from_hdf5_group(self):

        # setup HDF5 file
        node_path = 'test'
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()
        a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
        # reorder columns because will come back out in sorted order
        a = a[sorted(a.dtype.names)]
        with h5py.File(file_path, mode='w') as h5f:
            h5g = h5f.create_group(node_path)
            for n in a.dtype.names:
                h5g.create_dataset(n, data=a[n], chunks=True,
                                   compression='gzip')

        # file and node path
        vt = self._class.from_hdf5_group(file_path, node_path)
        self.assertIsInstance(vt, self._class)
        aeq(a, vt[:])

        # dataset
        with h5py.File(file_path, mode='r') as h5f:
            h5g = h5f[node_path]
            vt = self._class.from_hdf5_group(h5g)
            self.assertIsInstance(vt, self._class)
            aeq(a, vt[:])
예제 #16
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            HaplotypeArray()

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # data has wrong dimensions
        data = diploid_genotype_data  # use GenotypeArray instead
        with self.assertRaises(TypeError):
            HaplotypeArray(data)

        # haploid data (typed)
        h = HaplotypeArray(haplotype_data, dtype='i1')
        aeq(haplotype_data, h)
        eq(np.int8, h.dtype)
예제 #17
0
    def test_heterozygosity_observed(self):

        # diploid
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]
        actual = allel.stats.heterozygosity_observed(g, fill=-1)
        aeq(expect, actual)

        # polyploid
        g = GenotypeArray([[[0, 0, 0], [0, 0, 0]],
                           [[1, 1, 1], [1, 1, 1]],
                           [[1, 1, 1], [2, 2, 2]],
                           [[0, 0, 0], [0, 0, 1]],
                           [[0, 0, 0], [0, 0, 2]],
                           [[1, 1, 1], [0, 1, 2]],
                           [[0, 0, 1], [0, 1, 1]],
                           [[0, 1, 1], [0, 1, 2]],
                           [[0, 0, 0], [-1, -1, -1]],
                           [[0, 0, 1], [-1, -1, -1]],
                           [[-1, -1, -1], [-1, -1, -1]]], dtype='i1')
        expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]
        actual = allel.stats.heterozygosity_observed(g, fill=-1)
        aeq(expect, actual)
예제 #18
0
    def test_view(self):

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # data has wrong dimensions
        data = diploid_genotype_data  # use GenotypeArray instead
        with self.assertRaises(TypeError):
            np.array(data).view(HaplotypeArray)

        # haploid data
        h = np.array(haplotype_data).view(HaplotypeArray)
        aeq(haplotype_data, h)
        eq(np.int, h.dtype)
        eq(2, h.ndim)
        eq(4, h.n_variants)
        eq(3, h.n_haplotypes)
예제 #19
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            AlleleCountsArray()

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # data has wrong dimensions
        data = diploid_genotype_data
        with self.assertRaises(TypeError):
            AlleleCountsArray(data)

        # valid data (typed)
        ac = AlleleCountsArray(allele_counts_data, dtype='u1')
        aeq(allele_counts_data, ac)
        eq(np.uint8, ac.dtype)
예제 #20
0
    def test_view(self):

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # data has wrong dimensions
        data = diploid_genotype_data
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # valid data
        ac = np.array(allele_counts_data).view(AlleleCountsArray)
        aeq(allele_counts_data, ac)
        eq(np.int, ac.dtype)
        eq(2, ac.ndim)
        eq(6, ac.n_variants)
        eq(3, ac.n_alleles)
예제 #21
0
    def test_mean_pairwise_diversity(self):

        # start with simplest case, two haplotypes, one pairwise comparison
        h = HaplotypeArray([[0, 0],
                            [1, 1],
                            [0, 1],
                            [1, 2],
                            [0, -1],
                            [-1, -1]])
        ac = h.count_alleles()
        expect = [0, 0, 1, 1, -1, -1]
        actual = allel.stats.mean_pairwise_difference(ac, fill=-1)
        aeq(expect, actual)

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        ac = h.count_alleles()
        expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        actual = allel.stats.mean_pairwise_difference(ac, fill=-1)
        assert_array_close(expect, actual)
예제 #22
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            UniqueIndex()

        # data has wrong dimensions
        data = [['A', 'C'], ['B', 'F']]
        with self.assertRaises(TypeError):
            UniqueIndex(data)

        # labels are not unique
        data = ['A', 'B', 'D', 'B']
        with self.assertRaises(ValueError):
            UniqueIndex(data)

        # valid data
        data = ['A', 'C', 'B', 'F']
        lbl = UniqueIndex(data)
        aeq(data, lbl)
        eq(1, lbl.ndim)
        eq(4, len(lbl))

        # valid data (typed)
        data = np.array(['A', 'C', 'B', 'F'], dtype='S1')
        lbl = UniqueIndex(data, dtype='S1')
        aeq(data, lbl)
예제 #23
0
    def test_view(self):

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # data has wrong dimensions
        data = [1, 2, 3]
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # data has wrong dimensions
        data = diploid_genotype_data
        with self.assertRaises(TypeError):
            np.array(data).view(AlleleCountsArray)

        # valid data
        ac = np.array(allele_counts_data).view(AlleleCountsArray)
        aeq(allele_counts_data, ac)
        eq(np.int, ac.dtype)
        eq(2, ac.ndim)
        eq(5, ac.n_variants)
        eq(3, ac.n_alleles)
예제 #24
0
    def test_from_hdf5_condition(self):

        # setup HDF5 file
        node_path = 'test'
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()
        with h5py.File(file_path, mode='w') as h5f:
            h5f.create_dataset(node_path,
                               data=diploid_genotype_data,
                               chunks=(2, 3, 2))

        # selection
        condition = [False, True, False, True, False]

        # file and node path
        g = GenotypeCArray.from_hdf5(file_path, node_path, condition=condition)
        expect = GenotypeArray(diploid_genotype_data).compress(condition,
                                                               axis=0)
        aeq(expect, g)

        # dataset
        with h5py.File(file_path, mode='r') as h5f:
            dataset = h5f[node_path]
            g = GenotypeCArray.from_hdf5(dataset, condition=condition)
            aeq(expect, g)
예제 #25
0
    def test_constructor(self):

        # missing data arg
        with pytest.raises(TypeError):
            # noinspection PyArgumentList
            UniqueIndex()

        # data has wrong dimensions
        data = [['A', 'C'], ['B', 'F']]
        with pytest.raises(TypeError):
            UniqueIndex(data)

        # labels are not unique
        data = ['A', 'B', 'D', 'B']
        with pytest.raises(ValueError):
            UniqueIndex(data)

        # valid data
        data = ['A', 'C', 'B', 'F']
        lbl = UniqueIndex(data)
        aeq(data, lbl)
        assert 1 == lbl.ndim
        assert 4 == len(lbl)

        # valid data (typed)
        data = np.array(['A', 'C', 'B', 'F'], dtype='S1')
        lbl = UniqueIndex(data, dtype='S1')
        aeq(data, lbl)
예제 #26
0
    def test_heterozygosity_observed(self):

        # diploid
        g = GenotypeArray(
            [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]],
             [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]],
             [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]],
             [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]],
            dtype='i1')
        expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]
        actual = allel.heterozygosity_observed(g, fill=-1)
        aeq(expect, actual)

        # polyploid
        g = GenotypeArray(
            [[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]],
             [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]],
             [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]],
             [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]],
             [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]],
             [[-1, -1, -1], [-1, -1, -1]]],
            dtype='i1')
        expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]
        actual = allel.heterozygosity_observed(g, fill=-1)
        aeq(expect, actual)
예제 #27
0
    def test_to_hdf5_group(self):

        # setup HDF5 file
        node_path = 'test'
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()
        a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
        # reorder columns because will come back out in sorted order
        a = a[sorted(a.dtype.names)]
        vt = self.setup_instance(a)

        # write using file path and node path
        vt.to_hdf5_group(file_path, node_path)

        with h5py.File(file_path, mode='r') as h5f:
            h5g = h5f[node_path]
            eq(sorted(a.dtype.names), sorted(h5g.keys()))
            for n in a.dtype.names:
                aeq(a[n], h5g[n][:])

        # write using group and node path
        with h5py.File(file_path, mode='w') as h5f:
            vt.to_hdf5_group(h5f, node_path)

        with h5py.File(file_path, mode='r') as h5f:
            h5g = h5f[node_path]
            eq(sorted(a.dtype.names), sorted(h5g.keys()))
            for n in a.dtype.names:
                aeq(a[n], h5g[n][:])
예제 #28
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            HaplotypeDaskArray.from_array()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])  # use GenotypeDaskArray instead
        with assert_raises(ValueError):
            HaplotypeDaskArray.from_array(data)

        # valid data (typed)
        hd = self.setup_instance(np.array(haplotype_data, dtype='i1'))
        aeq(haplotype_data, hd)
        eq(np.int8, hd.dtype)
예제 #29
0
    def test_locate_unlinked(self):

        gn = [[0, 1, 2], [0, 1, 2]]
        expect = [True, False]
        actual = allel.locate_unlinked(gn, size=2, step=2, threshold=.5)
        aeq(expect, actual)

        gn = [[0, 1, 1, 2], [0, 1, 1, 2], [1, 1, 0, 2], [1, 1, 0, 2]]
        actual = allel.locate_unlinked(gn, size=2, step=1, threshold=.5)
        expect = [True, False, True, False]
        aeq(expect, actual)

        gn = [[0, 1, 1, 2], [0, 1, 1, 2], [0, 1, 1, 2], [1, 1, 0, 2],
              [1, 1, 0, 2]]
        actual = allel.locate_unlinked(gn, size=2, step=1, threshold=.5)
        expect = [True, False, True, True, False]
        aeq(expect, actual)
        actual = allel.locate_unlinked(gn, size=3, step=1, threshold=.5)
        expect = [True, False, False, True, False]
        aeq(expect, actual)

        # test with bcolz carray
        import bcolz
        gnz = bcolz.carray(gn, chunklen=2)
        actual = allel.locate_unlinked(gnz,
                                       size=2,
                                       step=1,
                                       threshold=.5,
                                       blen=2)
        expect = [True, False, True, True, False]
        aeq(expect, actual)
예제 #30
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            AlleleCountsChunkedArray()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(ValueError):
            AlleleCountsChunkedArray(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(ValueError):
            AlleleCountsChunkedArray(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(ValueError):
            AlleleCountsChunkedArray(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])
        with assert_raises(ValueError):
            AlleleCountsChunkedArray(data)

        # typed data (typed)
        ac = AlleleCountsChunkedArray(np.array(allele_counts_data, dtype='u1'))
        aeq(allele_counts_data, ac)
        eq(np.uint8, ac.dtype)
예제 #31
0
    def test_constructor(self):

        # missing data arg
        with assert_raises(TypeError):
            # noinspection PyArgumentList
            HaplotypeChunkedArray()

        # data has wrong dtype
        data = 'foo bar'
        with assert_raises(ValueError):
            HaplotypeChunkedArray(data)

        # data has wrong dtype
        data = np.array([4., 5., 3.7])
        with assert_raises(ValueError):
            HaplotypeChunkedArray(data)

        # data has wrong dimensions
        data = np.array([1, 2, 3])
        with assert_raises(ValueError):
            HaplotypeChunkedArray(data)

        # data has wrong dimensions
        data = np.array([[[1, 2], [3, 4]]])  # use GenotypeCArray instead
        with assert_raises(ValueError):
            HaplotypeChunkedArray(data)

        # typed data (typed)
        h = HaplotypeChunkedArray(np.array(haplotype_data, dtype='i1'))
        aeq(haplotype_data, h)
        eq(np.int8, h.dtype)
예제 #32
0
 def test_view(self):
     a = np.rec.array(variant_table_data,
                      dtype=variant_table_dtype)
     vt = a.view(VariantTable)
     aeq(a, vt)
     eq(1, vt.ndim)
     eq(5, vt.n_variants)
     eq(variant_table_names, vt.names)
예제 #33
0
 def test_view(self):
     a = np.rec.array(feature_table_data,
                      dtype=feature_table_dtype)
     ft = a.view(FeatureTable)
     aeq(a, ft)
     eq(1, ft.ndim)
     eq(6, ft.n_features)
     eq(feature_table_names, ft.names)
예제 #34
0
 def test_take(self):
     g = np.array(diploid_genotype_data)
     gd = self.setup_instance(g)
     # take variants not in original order
     indices = [2, 0]
     expect = g.take(indices, axis=0)
     actual = gd.take(indices, axis=0)
     aeq(expect, actual)
예제 #35
0
 def test_view(self):
     a = np.rec.array(variant_table_data,
                      dtype=variant_table_dtype)
     vt = a.view(VariantTable)
     aeq(a, vt)
     eq(1, vt.ndim)
     eq(5, vt.n_variants)
     eq(variant_table_names, vt.names)
예제 #36
0
 def test_view(self):
     a = np.rec.array(feature_table_data,
                      dtype=feature_table_dtype)
     ft = a.view(FeatureTable)
     aeq(a, ft)
     eq(1, ft.ndim)
     eq(6, ft.n_features)
     eq(feature_table_names, ft.names)
예제 #37
0
 def test_take(self):
     g = np.array(diploid_genotype_data)
     gd = self.setup_instance(g)
     # take variants not in original order
     indices = [2, 0]
     expect = g.take(indices, axis=0)
     actual = gd.take(indices, axis=0)
     aeq(expect, actual)
예제 #38
0
 def test_sfs_scaled(self):
     dac = [0, 1, 2, 1]
     expect = [0, 2, 2]
     actual = allel.sfs_scaled(dac)
     aeq(expect, actual)
     for dtype in 'u2', 'i2', 'u8', 'i8':
         daca = np.asarray(dac, dtype=dtype)
         actual = allel.sfs_scaled(daca)
         aeq(expect, actual)
예제 #39
0
    def test_eval_vm(self):
        a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
        vt = self.setup_instance(a)

        expr = '(DP > 30) & (QD < 4)'
        r = vt.eval(expr, vm='numexpr')
        aeq([False, False, True, False, True], r)
        r = vt.eval(expr, vm='python')
        aeq([False, False, True, False, True], r)
예제 #40
0
 def test_sfs_folded(self):
     ac = [[0, 3], [1, 2], [2, 1]]
     expect = [1, 2]
     actual = allel.sfs_folded(ac)
     aeq(expect, actual)
     for dtype in 'u2', 'i2', 'u8', 'i8':
         aca = np.asarray(ac, dtype=dtype)
         actual = allel.sfs_folded(aca)
         aeq(expect, actual)
예제 #41
0
 def test_take(self):
     a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
     vt = VariantTable(a)
     # take variants not in original order
     indices = [2, 0]
     t = vt.take(indices)
     eq(2, t.n_variants)
     expect = a.take(indices)
     aeq(expect, t)
예제 #42
0
    def test_eval_vm(self):
        a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
        vt = self.setup_instance(a)

        expr = '(DP > 30) & (QD < 4)'
        r = vt.eval(expr, vm='numexpr')
        aeq([False, False, True, False, True], r)
        r = vt.eval(expr, vm='python')
        aeq([False, False, True, False, True], r)
예제 #43
0
 def test_pdist(self):
     from allel.stats.distance import pdist
     h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                         [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                         [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
     import scipy.spatial
     d1 = scipy.spatial.distance.pdist(h.T, 'hamming')
     d2 = pdist(h, 'hamming')
     aeq(d1, d2)
예제 #44
0
    def test_roh_mhmm_100pct(self):

        # values correspond to start/stop/length/is_marginal
        roh_expected = np.array([[1, 100, 100, True]], dtype=object)
        fraction_expected = 1.0
        gv = np.zeros((4, 2), dtype=np.int16)
        pos = [1, 10, 50, 100]
        roh, fraction = allel.roh_mhmm(gv, pos, contig_size=100)
        aeq(roh.values, roh_expected)
        assert fraction == fraction_expected
예제 #45
0
 def test_take(self):
     g = self.setup_instance(diploid_genotype_data)
     # take variants not in original order
     indices = [2, 0]
     t = g.take(indices, axis=0)
     eq(2, t.n_variants)
     eq(g.n_samples, t.n_samples)
     eq(g.ploidy, t.ploidy)
     expect = np.array(diploid_genotype_data).take(indices, axis=0)
     aeq(expect, t)
예제 #46
0
    def test_mask_inaccessible(self):
        np.random.seed(2837)
        for n_vars in [5, 50, 500]:
            pos = np.arange(1, n_vars + 1)
            ac = np.random.randint(1, 40, n_vars * 2).reshape((n_vars, 2))
            mask = np.random.randint(2, size=n_vars).astype(bool)

            mpos, mac = mask_inaccessible(mask, pos, ac)
            aeq(mac, ac[mask])
            aeq(mpos, pos[mask])
예제 #47
0
 def test_take(self):
     a = np.rec.array(variant_table_data,
                      dtype=variant_table_dtype)
     vt = VariantTable(a)
     # take variants not in original order
     indices = [2, 0]
     t = vt.take(indices)
     eq(2, t.n_variants)
     expect = a.take(indices)
     aeq(expect, t)
예제 #48
0
 def test_take(self):
     g = self.setup_instance(diploid_genotype_data)
     # take variants not in original order
     indices = [2, 0]
     t = g.take(indices, axis=0)
     eq(2, t.n_variants)
     eq(g.n_samples, t.n_samples)
     eq(g.ploidy, t.ploidy)
     expect = np.array(diploid_genotype_data).take(indices, axis=0)
     aeq(expect, t)
예제 #49
0
    def test_moving_statistic(self):
        f = allel.moving_statistic

        values = [2, 5, 8, 16]
        expect = [7, 24]
        actual = f(values, statistic=np.sum, size=2)
        aeq(expect, actual)

        values = [2, 5, 8, 16]
        expect = [7, 13, 24]
        actual = f(values, statistic=np.sum, size=2, step=1)
        aeq(expect, actual)
예제 #50
0
    def test_moving_statistic(self):
        f = allel.stats.moving_statistic

        values = [2, 5, 8, 16]
        expect = [7, 24]
        actual = f(values, statistic=np.sum, size=2)
        aeq(expect, actual)

        values = [2, 5, 8, 16]
        expect = [7, 13, 24]
        actual = f(values, statistic=np.sum, size=2, step=1)
        aeq(expect, actual)
예제 #51
0
    def test_count_alleles_subpops(self):

        data = chunked.storage_registry['default'].array(diploid_genotype_data, chunklen=2)
        g = GenotypeChunkedArray(data)
        subpops = {'foo': [0, 2], 'bar': [1]}
        ac_subpops = g.count_alleles_subpops(subpops)
        for p in subpops.keys():
            ac = g.take(subpops[p], axis=1).count_alleles()
            aeq(ac, ac_subpops[p])

        loc = np.array([True, False, True, False, True])
        t = ac_subpops.compress(loc)
        eq(3, len(t))
예제 #52
0
    def test_mean_pairwise_divergence(self):

        # simplest case, two haplotypes in each population
        h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                            [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                            [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
        h1 = h.take([0, 1], axis=1)
        h2 = h.take([2, 3], axis=1)
        ac1 = h1.count_alleles()
        ac2 = h2.count_alleles()

        expect = [0 / 4, 2 / 4, 4 / 4, 2 / 4, 0 / 4, 4 / 4, 3 / 4, -1, -1]
        actual = allel.mean_pairwise_difference_between(ac1, ac2, fill=-1)
        aeq(expect, actual)
예제 #53
0
    def test_to_n_ref_array_like(self):
        # see also https://github.com/cggh/scikit-allel/issues/66

        gn = self.setup_instance(diploid_genotype_data).to_n_ref(fill=-1)
        t = gn > 0
        eq(4, np.count_nonzero(t))
        expect = np.array([[1, 1, 0],
                           [1, 0, 0],
                           [1, 0, 0],
                           [0, 0, 0],
                           [0, 0, 0]], dtype='b1')
        aeq(expect, t)

        # numpy reductions trigger the issue

        expect = np.array([2, 1, 1, 0, 0])
        actual = np.sum(t, axis=1)
        aeq(expect, actual)

        expect = np.array([0, 0, 0, 0, 0])
        actual = np.min(t, axis=1)
        aeq(expect, actual)

        expect = np.array([1, 1, 1, 0, 0])
        actual = np.max(t, axis=1)
        aeq(expect, actual)
예제 #54
0
 def test_pdist(self):
     h = HaplotypeArray([[0, 0, 0, 0],
                         [0, 0, 0, 1],
                         [0, 0, 1, 1],
                         [0, 1, 1, 1],
                         [1, 1, 1, 1],
                         [0, 0, 1, 2],
                         [0, 1, 1, 2],
                         [0, 1, -1, -1],
                         [-1, -1, -1, -1]])
     import scipy.spatial
     d1 = scipy.spatial.distance.pdist(h.T, 'hamming')
     import allel.stats.distance
     d2 = allel.stats.distance.pdist(h, 'hamming')
     aeq(d1, d2)
예제 #55
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            SortedIndex()

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            SortedIndex(data)

        # data has wrong dimensions
        data = [[1, 2], [3, 4]]
        with self.assertRaises(TypeError):
            SortedIndex(data)

        # values are not sorted
        data = [2, 1, 3, 5]
        with self.assertRaises(ValueError):
            SortedIndex(data)

        # values are not sorted
        data = [4., 5., 3.7]
        with self.assertRaises(ValueError):
            SortedIndex(data)

        # valid data (unique)
        data = [1, 4, 5, 7, 12]
        idx = SortedIndex(data)
        aeq(data, idx)
        eq(np.int, idx.dtype)
        eq(1, idx.ndim)
        eq(5, len(idx))
        assert idx.is_unique

        # valid data (non-unique)
        data = [1, 4, 5, 5, 7, 12]
        idx = SortedIndex(data)
        aeq(data, idx)
        eq(np.int, idx.dtype)
        eq(1, idx.ndim)
        eq(6, len(idx))
        assert not idx.is_unique

        # valid data (typed)
        data = [1, 4, 5, 5, 7, 12]
        idx = SortedIndex(data, dtype='u4')
        aeq(data, idx)
        eq(np.uint32, idx.dtype)

        # valid data (non-numeric)
        data = ['1', '12', '4', '5', '5', '7']
        idx = SortedIndex(data)
        aeq(data, idx)
예제 #56
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            FeatureChunkedTable()

        # recarray
        ra = np.rec.array(feature_table_data, dtype=feature_table_dtype)
        ft = FeatureChunkedTable(ra)
        eq(6, len(ft))
        aeq(ra, ft)

        # dict
        d = {n: ra[n] for n in feature_table_names}
        ft = FeatureChunkedTable(d, names=feature_table_names)
        eq(6, len(ft))
        aeq(ra, ft)
예제 #57
0
    def test_constructor(self):

        # missing data arg
        with self.assertRaises(TypeError):
            # noinspection PyArgumentList
            VariantChunkedTable()

        # recarray
        ra = np.rec.array(variant_table_data, dtype=variant_table_dtype)
        vt = VariantChunkedTable(ra)
        eq(5, len(vt))
        aeq(ra, vt)

        # dict
        d = {n: ra[n] for n in variant_table_names}
        vt = VariantChunkedTable(d, names=variant_table_names)
        eq(5, len(vt))
        aeq(ra, vt)
예제 #58
0
    def test_from_hdf5(self):

        # setup HDF5 file
        node_path = 'test'
        tf = tempfile.NamedTemporaryFile(delete=False)
        file_path = tf.name
        tf.close()
        with h5py.File(file_path, mode='w') as h5f:
            h5f.create_dataset(node_path,
                               data=haplotype_data,
                               chunks=(2, 3))

        # file and node path
        h = HaplotypeCArray.from_hdf5(file_path, node_path)
        aeq(haplotype_data, h)

        # dataset
        with h5py.File(file_path, mode='r') as h5f:
            dataset = h5f[node_path]
            h = HaplotypeCArray.from_hdf5(dataset)
            aeq(haplotype_data, h)
예제 #59
0
    def test_mean_pairwise_divergence(self):

        # simplest case, two haplotypes in each population
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        h1 = h.take([0, 1], axis=1)
        h2 = h.take([2, 3], axis=1)
        ac1 = h1.count_alleles()
        ac2 = h2.count_alleles()

        expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1]
        actual = allel.stats.mean_pairwise_difference_between(ac1, ac2,
                                                              fill=-1)
        aeq(expect, actual)
예제 #60
0
    def test_view(self):

        # data has wrong dtype
        data = 'foo bar'
        with self.assertRaises(TypeError):
            np.asarray(data).view(SortedIndex)

        # data has wrong dimensions
        data = [[1, 2], [3, 4]]
        with self.assertRaises(TypeError):
            np.asarray(data).view(SortedIndex)

        # values are not sorted
        data = [2, 1, 3, 5]
        with self.assertRaises(ValueError):
            np.asarray(data).view(SortedIndex)

        # values are not sorted
        data = [4., 5., 3.7]
        with self.assertRaises(ValueError):
            np.asarray(data).view(SortedIndex)

        # valid data (unique)
        data = [1, 4, 5, 7, 12]
        idx = np.asarray(data).view(SortedIndex)
        aeq(data, idx)
        eq(np.int, idx.dtype)
        eq(1, idx.ndim)
        eq(5, len(idx))
        assert idx.is_unique

        # valid data (non-unique)
        data = [1, 4, 5, 5, 7, 12]
        idx = np.asarray(data).view(SortedIndex)
        aeq(data, idx)
        eq(np.int, idx.dtype)
        eq(1, idx.ndim)
        eq(6, len(idx))
        assert not idx.is_unique

        # valid data (typed)
        data = np.array([1, 4, 5, 5, 7, 12], dtype='u4')
        idx = np.asarray(data).view(SortedIndex)
        aeq(data, idx)
        eq(np.uint32, idx.dtype)

        # valid data (non-numeric)
        data = ['1', '12', '4', '5', '5', '7']
        idx = np.asarray(data).view(SortedIndex)
        aeq(data, idx)