Exemple #1
0
 def test_count(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES)) as fasta_handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.count([fasta_handle], profile_handle, 8)
     utils.test_profile_file(filename, counts, 8)
Exemple #2
0
    def test_smooth(self):
        # See test_kdistlib.test_ProfileDistance_dynamic_smooth
        counts_left = Counter([
            'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT',
            'TA', 'TG', 'TT'
        ])
        counts_right = Counter([
            'AC', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA',
            'TC', 'TG', 'TT'
        ])
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 2)) as handle_left:
            with utils.open_profile(self.profile(counts_right,
                                                 2)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.smooth(handle_left,
                                    handle_right,
                                    out_left,
                                    out_right,
                                    summary='min')

        counts_left = Counter([
            'AA', 'AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT',
            'TA', 'TA', 'TA'
        ])
        counts_right = Counter([
            'AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA',
            'TA', 'TA', 'TA'
        ])

        utils.test_profile_file(filename_left, counts_left, 2)
        utils.test_profile_file(filename_right, counts_right, 2)
Exemple #3
0
    def test_scale(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.scale(handle_left, handle_right, out_left, out_right)

        if sum(counts_left.values()) < sum(counts_right.values()):
            scale_left = sum(counts_right.values()) / sum(counts_left.values())
            scale_right = 1.0
        else:
            scale_left = 1.0
            scale_right = sum(counts_left.values()) / sum(counts_right.values())

        for s in counts_left:
            counts_left[s] *= scale_left
        for s in counts_right:
            counts_right[s] *= scale_right

        utils.test_profile_file(filename_left, counts_left, 8)
        utils.test_profile_file(filename_right, counts_right, 8)
Exemple #4
0
    def test_scale(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.scale(handle_left, handle_right, out_left, out_right)

        if sum(counts_left.values()) < sum(counts_right.values()):
            scale_left = sum(counts_right.values()) / sum(counts_left.values())
            scale_right = 1.0
        else:
            scale_left = 1.0
            scale_right = sum(counts_left.values()) / sum(counts_right.values())

        for s in counts_left:
            counts_left[s] *= scale_left
        for s in counts_right:
            counts_right[s] *= scale_right

        utils.test_profile_file(filename_left, counts_left, 8)
        utils.test_profile_file(filename_right, counts_right, 8)
Exemple #5
0
 def test_count(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES)) as fasta_handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.count([fasta_handle], profile_handle, 8)
     utils.test_profile_file(filename, counts, 8)
Exemple #6
0
 def test_convert(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.profile_old_format(counts, 8)) as handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.convert([handle], profile_handle)
     utils.test_profile_file(filename, counts, 8)
Exemple #7
0
 def test_convert(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.profile_old_format(counts, 8)) as handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.convert([handle], profile_handle)
     utils.test_profile_file(filename, counts, 8)
Exemple #8
0
    def test_balance(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts, 8)) as input_handle:
            with utils.open_profile(filename, 'w') as output_handle:
                kmer.balance(input_handle, output_handle)
        counts.update(dict((utils.reverse_complement(s), c) for s, c in counts.items()))
        utils.test_profile_file(filename, counts, 8)
Exemple #9
0
    def test_balance(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts, 8)) as input_handle:
            with utils.open_profile(filename, 'w') as output_handle:
                kmer.balance(input_handle, output_handle)
        counts.update(dict((utils.reverse_complement(s), c) for s, c in counts.items()))
        utils.test_profile_file(filename, counts, 8)
Exemple #10
0
 def test_count_by_record(self):
     counts_by_record = [utils.counts(record, 8) for record in utils.SEQUENCES]
     names = [str(i) for i, _ in enumerate(counts_by_record)]
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES, names=names)) as fasta_handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.count([fasta_handle], profile_handle, 8, by_record=True)
     for name, counts in zip(names, counts_by_record):
         utils.test_profile_file(filename, counts, 8, name=name)
Exemple #11
0
    def test_profile_save(self):
        counts = utils.counts(utils.SEQUENCES, 4)
        profile = klib.Profile(utils.as_array(counts, 4))

        filename = self.empty()
        with utils.open_profile(filename, 'w') as profile_handle:
            profile.save(profile_handle)

        utils.test_profile_file(filename, counts, 4)
Exemple #12
0
    def test_profile_save(self):
        counts = utils.counts(utils.SEQUENCES, 4)
        profile = klib.Profile(utils.as_array(counts, 4))

        filename = self.empty()
        with utils.open_profile(filename, 'w') as profile_handle:
            profile.save(profile_handle)

        utils.test_profile_file(filename, counts, 4)
Exemple #13
0
    def test_merge(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle)
        utils.test_profile_file(filename, counts_left + counts_right, 8)
Exemple #14
0
    def test_merge(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle)
        utils.test_profile_file(filename, counts_left + counts_right, 8)
Exemple #15
0
 def test_count_multi(self):
     counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
     counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES_LEFT)) as handle_left:
         with open(self.fasta(utils.SEQUENCES_RIGHT)) as handle_right:
             with utils.open_profile(filename, 'w') as profile_handle:
                 kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b'])
     utils.test_profile_file(filename, counts_left, 8, name='a')
     utils.test_profile_file(filename, counts_right, 8, name='b')
Exemple #16
0
    def test_cat_prefixes(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_a, 8, name='X')) as handle_a:
            with utils.open_profile(self.profile(counts_b, 8, name='X')) as handle_b:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.cat([handle_a, handle_b], profile_handle, prefixes=['a_', 'b_'])
        utils.test_profile_file(filename, counts_a, 8, name='a_X')
        utils.test_profile_file(filename, counts_b, 8, name='b_X')
Exemple #17
0
    def test_cat_prefixes(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_a, 8, name='X')) as handle_a:
            with utils.open_profile(self.profile(counts_b, 8, name='X')) as handle_b:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.cat([handle_a, handle_b], profile_handle, prefixes=['a_', 'b_'])
        utils.test_profile_file(filename, counts_a, 8, name='a_X')
        utils.test_profile_file(filename, counts_b, 8, name='b_X')
Exemple #18
0
    def test_shrink(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts, 8)) as input_handle:
            with utils.open_profile(filename, 'w') as output_handle:
                kmer.shrink(input_handle, output_handle, 1)

        counts = Counter(dict((t, sum(counts[u] for u in counts
                                            if u.startswith(t)))
                                    for t in set(s[:-1] for s in counts)))
        utils.test_profile_file(filename, counts, 7)
Exemple #19
0
    def test_shrink(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts, 8)) as input_handle:
            with utils.open_profile(filename, 'w') as output_handle:
                kmer.shrink(input_handle, output_handle, 1)

        counts = Counter(dict((t, sum(counts[u] for u in counts
                                            if u.startswith(t)))
                                    for t in set(s[:-1] for s in counts)))
        utils.test_profile_file(filename, counts, 7)
Exemple #20
0
    def test_shuffle(self):
        # See test_klib.profile_shuffle
        counts = utils.counts(utils.SEQUENCES, 2)
        filename = self.empty()

        with utils.open_profile(self.profile(counts, 2)) as input_handle:
            with utils.open_profile(filename, 'w') as output_handle:
                np.random.seed(100)
                kmer.shuffle(input_handle, output_handle)

        counts = dict(zip([''.join(s) for s in itertools.product('ACGT', repeat=2)],
                          [13,  7,  6, 18, 12,  1, 13, 17, 16, 12, 23, 27, 24, 17, 18, 12]))
        utils.test_profile_file(filename, counts, 2)
Exemple #21
0
    def test_shuffle(self):
        # See test_klib.profile_shuffle
        counts = utils.counts(utils.SEQUENCES, 2)
        filename = self.empty()

        with utils.open_profile(self.profile(counts, 2)) as input_handle:
            with utils.open_profile(filename, 'w') as output_handle:
                np.random.seed(100)
                kmer.shuffle(input_handle, output_handle)

        counts = dict(zip([''.join(s) for s in itertools.product('ACGT', repeat=2)],
                          [13,  7,  6, 18, 12,  1, 13, 17, 16, 12, 23, 27, 24, 17, 18, 12]))
        utils.test_profile_file(filename, counts, 2)
Exemple #22
0
 def test_count_multi_by_record(self):
     counts_by_record_left = [utils.counts(record, 8) for record in utils.SEQUENCES_LEFT]
     counts_by_record_right = [utils.counts(record, 8) for record in utils.SEQUENCES_RIGHT]
     names_left = [str(i) for i, _ in enumerate(counts_by_record_left)]
     names_right = [str(i) for i, _ in enumerate(counts_by_record_right)]
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES_LEFT, names=names_left)) as handle_left:
         with open(self.fasta(utils.SEQUENCES_RIGHT, names=names_right)) as handle_right:
             with utils.open_profile(filename, 'w') as profile_handle:
                 kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b'], by_record=True)
     for name, counts in zip(names_left, counts_by_record_left):
         utils.test_profile_file(filename, counts, 8, name='a_' + name)
     for name, counts in zip(names_right, counts_by_record_right):
         utils.test_profile_file(filename, counts, 8, name='b_' + name)
Exemple #23
0
    def test_merge_custom_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='numpy.multiply')

        counts_mult = Counter(dict((s, counts_left[s] * counts_right[s])
                                   for s in set(counts_left) & set(counts_right)))

        utils.test_profile_file(filename, counts_mult, 8)
Exemple #24
0
    def test_merge_custom_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='numpy.multiply')

        counts_mult = Counter(dict((s, counts_left[s] * counts_right[s])
                                   for s in set(counts_left) & set(counts_right)))

        utils.test_profile_file(filename, counts_mult, 8)
Exemple #25
0
    def test_merge_custom_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='(left + right) * np.logical_xor(left, right)')

        counts_xor = counts_left + counts_right
        for s in set(counts_left) & set(counts_right):
            del counts_xor[s]

        utils.test_profile_file(filename, counts_xor, 8)
Exemple #26
0
    def test_merge_custom_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='(left + right) * np.logical_xor(left, right)')

        counts_xor = counts_left + counts_right
        for s in set(counts_left) & set(counts_right):
            del counts_xor[s]

        utils.test_profile_file(filename, counts_xor, 8)
Exemple #27
0
    def test_positive(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.positive(handle_left, handle_right, out_left, out_right)

        utils.test_profile_file(filename_left, Counter(s for s in counts_left.elements()
                                                             if s in counts_right), 8)
        utils.test_profile_file(filename_right, Counter(s for s in counts_right.elements()
                                                              if s in counts_left), 8)
Exemple #28
0
    def test_positive(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.positive(handle_left, handle_right, out_left, out_right)

        utils.test_profile_file(filename_left, Counter(s for s in counts_left.elements()
                                                             if s in counts_right), 8)
        utils.test_profile_file(filename_right, Counter(s for s in counts_right.elements()
                                                              if s in counts_left), 8)
Exemple #29
0
    def test_smooth(self):
        # See test_kdistlib.test_ProfileDistance_dynamic_smooth
        counts_left = Counter(['AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TG', 'TT'])
        counts_right = Counter(['AC', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT'])
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 2)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 2)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.smooth(handle_left, handle_right, out_left, out_right, summary='min')

        counts_left = Counter(['AA', 'AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TA', 'TA'])
        counts_right = Counter(['AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TA', 'TA', 'TA'])

        utils.test_profile_file(filename_left, counts_left, 2)
        utils.test_profile_file(filename_right, counts_right, 2)