Beispiel #1
0
    def test_get_ligation_structure_biases(self):
        sam_file1 = os.path.join(self.dir, "test_matrix",
                                 "yeast.sample.chrI.1_sorted.sam")
        sam_file2 = os.path.join(self.dir, "test_matrix",
                                 "yeast.sample.chrI.2_sorted.sam")

        chrI = Chromosome.from_fasta(
            os.path.join(self.dir, "test_matrix", "chrI.fa"))
        genome = Genome(chromosomes=[chrI])
        pairs = self.pairs_class()
        regions = genome.get_regions('HindIII')
        pairs.add_regions(regions.regions)
        pair_generator = SamBamReadPairGenerator(sam_file1, sam_file2)
        pairs.add_read_pairs(pair_generator)
        genome.close()
        regions.close()

        x, i, o, b = pairs.get_ligation_structure_biases(
            sampling=200, skip_self_ligations=False)
        assert len(x) == len(i) == len(o) == len(b) == 3
        assert x.tolist() == [470, 4489, 19259]
        assert i.tolist() == [
            2.8756218905472637, 0.8059701492537313, 0.6368159203980099
        ]
        assert o.tolist() == [
            0.2537313432835821, 0.24875621890547264, 0.46766169154228854
        ]
        assert b.tolist() == [830, 413, 423]
        pairs.close()
Beispiel #2
0
    def test_from_pairs(self):
        sam_file1 = os.path.join(self.dir, "test_matrix",
                                 "yeast.sample.chrI.1_sorted.sam")
        sam_file2 = os.path.join(self.dir, "test_matrix",
                                 "yeast.sample.chrI.2_sorted.sam")
        chrI = Chromosome.from_fasta(
            os.path.join(self.dir, "test_matrix", "chrI.fa"))
        genome = Genome(chromosomes=[chrI])

        pairs = ReadPairs()
        regions = genome.get_regions('HindIII')
        pairs.add_regions(regions)
        g = SamBamReadPairGenerator(sam_file1, sam_file2)
        pairs.add_read_pairs(g)
        genome.close()
        regions.close()

        pl = len(pairs)

        hic = pairs.to_hic(_hic_class=self.hic_class)

        assert len(hic.regions) == len(pairs.regions)
        pairs.close()

        reads = 0
        edge_set = set()
        for edge in hic.edges():
            key = (edge.source, edge.sink)
            assert key not in edge_set
            edge_set.add(key)
            reads += edge.weight

        assert reads == pl
        hic.close()
Beispiel #3
0
 def setup_method(self, method):
     chr1 = Chromosome(name='chr1',
                       length=10000,
                       sequence='agcgctgctgaagcttcgatcgtaagcttc')
     chr2 = Chromosome(name='chr2',
                       length=5000,
                       sequence='gcgctgctgaagcttcgatcgtaagcttc')
     self.genome = Genome(chromosomes=[chr1, chr2])
Beispiel #4
0
 def test_from_string(self):
     dir = os.path.dirname(os.path.realpath(__file__))
     genome = Genome.from_string(dir + '/test_regions/chromosomes.fa')
     chr1 = genome[0]
     assert len(chr1) == 5
     chr2 = genome[1]
     assert len(chr2) == 3
     chr3 = genome[2]
     assert len(chr3) == 4
     chr4 = genome[3]
     assert len(chr4) == 2
     genome.close()
Beispiel #5
0
    def test_knight_matrix_balancing_per_chromosome(self):
        chrI = Chromosome.from_fasta(self.dir + "/test_matrix/chrI.fa")
        genome = Genome(chromosomes=[chrI])

        hic = self.hic_class()
        regions = genome.get_regions(10000)
        genome.close()
        hic.add_regions(regions)
        regions.close()
        hic.load_from_hic(self.hic_cerevisiae)

        m = hic[:, :]
        assert is_symmetric(m)

        kr_balancing(hic, whole_matrix=False)
        m_corr = hic[:, :]
        assert is_symmetric(m_corr)

        for n in sum(m_corr):
            if np.ma.is_masked(n):
                continue
            assert abs(1.0 - n) < 1e-5 or n == 0
        hic.close()
Beispiel #6
0
 def setup_method(self, method):
     self.dir = os.path.dirname(os.path.realpath(__file__))
     sam1_file = os.path.join(self.dir, "test_pairs",
                              "lambda_reads1_sort.sam")
     sam2_file = os.path.join(self.dir, "test_pairs",
                              "lambda_reads2_sort.sam")
     pair_generator = SamBamReadPairGenerator(sam1_file, sam2_file)
     self.pairs = ReadPairs()
     self.genome = Genome.from_folder(
         os.path.join(self.dir, "test_pairs", "lambda_genome"))
     regions = self.genome.get_regions(1000)
     self.pairs.add_regions(regions.regions)
     regions.close()
     self.pairs.add_read_pairs(pair_generator)
     self.pairs_class = ReadPairs
Beispiel #7
0
    def test_ice_matrix_balancing(self):
        chrI = Chromosome.from_fasta(self.dir + "/test_matrix/chrI.fa")
        genome = Genome(chromosomes=[chrI])

        hic = self.hic_class()
        regions = genome.get_regions(10000)
        genome.close()
        hic.add_regions(regions)
        regions.close()
        hic.load_from_hic(self.hic_cerevisiae)

        m = hic[:, :]
        assert is_symmetric(m)

        ice_balancing(hic)
        m_corr = hic[:, :]
        assert is_symmetric(m_corr)

        sum_m_corr = sum(m_corr)
        for n in sum_m_corr:
            if np.ma.is_masked(n):
                continue
            assert (sum_m_corr[0] - 5 < n < sum_m_corr[0] + 5) or n == 0
        hic.close()
Beispiel #8
0
class TestGenome:
    def setup_method(self, method):
        chr1 = Chromosome(name='chr1',
                          length=10000,
                          sequence='agcgctgctgaagcttcgatcgtaagcttc')
        chr2 = Chromosome(name='chr2',
                          length=5000,
                          sequence='gcgctgctgaagcttcgatcgtaagcttc')
        self.genome = Genome(chromosomes=[chr1, chr2])

    def teardown_method(self, method):
        self.genome.close()

    def test_iter(self):
        i = 0
        for chromosome in self.genome:
            if i == 0:
                assert chromosome.name == 'chr1'
                assert chromosome.length == 10000
                assert chromosome.sequence == 'agcgctgctgaagcttcgatcgtaagcttc'
            if i == 1:
                assert chromosome.name == 'chr2'
                assert chromosome.length == 5000
                assert chromosome.sequence == 'gcgctgctgaagcttcgatcgtaagcttc'
            i += 1

    def test_node_list(self):
        regions = self.genome.get_regions('HindIII')

        assert len(regions) == 6
        for i in range(0, len(regions)):
            region = regions[i]
            if i == 0:
                assert region.chromosome == 'chr1'
                assert region.start == 1
                assert region.end == 12
            if i == 5:
                assert region.chromosome == 'chr2'
                assert region.start == 25
                assert region.end == 5000
            i += 1

        regions.close()

        nl = self.genome.get_regions(4000)

        assert len(nl) == 5
        for i in range(0, len(nl)):
            node = nl[i]
            if i == 0:
                assert node.chromosome == 'chr1'
                assert node.start == 1
                assert node.end == 4000
            if i == 5:
                assert node.chromosome == 'chr2'
                assert node.start == 4001
                assert node.end == 5000
            i += 1

        nl.close()

    def test_from_string(self):
        dir = os.path.dirname(os.path.realpath(__file__))
        genome = Genome.from_string(dir + '/test_regions/chromosomes.fa')
        chr1 = genome[0]
        assert len(chr1) == 5
        chr2 = genome[1]
        assert len(chr2) == 3
        chr3 = genome[2]
        assert len(chr3) == 4
        chr4 = genome[3]
        assert len(chr4) == 2
        genome.close()