Exemple #1
0
    def test_remove_contained_in_list(self):
        '''test_remove_contained_in_list removes the right elements of list'''
        a = [intervals.Interval(1,2),
             intervals.Interval(4,4),
             intervals.Interval(4,5),
             intervals.Interval(5,6),
             intervals.Interval(7,9),
             intervals.Interval(8,10),
             intervals.Interval(9,11),
             intervals.Interval(20,25),
             intervals.Interval(20,24),
             intervals.Interval(20,26),
             intervals.Interval(30,38),
             intervals.Interval(30,37),
             intervals.Interval(30,36),
             intervals.Interval(30,35),
             intervals.Interval(30,35),
             intervals.Interval(32,33),
             intervals.Interval(38,50),
             intervals.Interval(65,70),
             intervals.Interval(67,70)]

        b = [intervals.Interval(1,2),
             intervals.Interval(4,5),
             intervals.Interval(5,6),
             intervals.Interval(7,9),
             intervals.Interval(8,10),
             intervals.Interval(9,11),
             intervals.Interval(20,26),
             intervals.Interval(30,38),
             intervals.Interval(38,50),
             intervals.Interval(65,70)]

        intervals.remove_contained_in_list(a)
        self.assertSequenceEqual(a, b)
    def test_remove_contained_in_list(self):
        '''test_remove_contained_in_list removes the right elements of list'''
        a = [
            intervals.Interval(1, 2),
            intervals.Interval(4, 4),
            intervals.Interval(4, 5),
            intervals.Interval(5, 6),
            intervals.Interval(7, 9),
            intervals.Interval(8, 10),
            intervals.Interval(9, 11),
            intervals.Interval(20, 25),
            intervals.Interval(20, 24),
            intervals.Interval(20, 26),
            intervals.Interval(30, 38),
            intervals.Interval(30, 37),
            intervals.Interval(30, 36),
            intervals.Interval(30, 35),
            intervals.Interval(30, 35),
            intervals.Interval(32, 33),
            intervals.Interval(38, 50),
            intervals.Interval(65, 70),
            intervals.Interval(67, 70)
        ]

        b = [
            intervals.Interval(1, 2),
            intervals.Interval(4, 5),
            intervals.Interval(5, 6),
            intervals.Interval(7, 9),
            intervals.Interval(8, 10),
            intervals.Interval(9, 11),
            intervals.Interval(20, 26),
            intervals.Interval(30, 38),
            intervals.Interval(38, 50),
            intervals.Interval(65, 70)
        ]

        intervals.remove_contained_in_list(a)
        self.assertSequenceEqual(a, b)
# merge all the overalpping hits for each list of hits corresponding to one contig
for ref_name, d in nucmer_hits.items():
    for qry_name, hits in d.items():
        intervals.merge_overlapping_in_list(hits)

        for hit in hits:
            if hit.end - hit.start + 1 >= options.min_seq_length:
                if ref_name not in contigs_to_print:
                    contigs_to_print[ref_name] = []

                contigs_to_print[ref_name].append(copy.copy(hit))

# remove any contigs that are completely contained in another contig
for ref, l in contigs_to_print.items():
    intervals.remove_contained_in_list(l)

# print the final perfect contigs
f_out = utils.open_file_write(options.outprefix + '.fa')
counter = 1
last_id = None
for ref_name in sorted(contigs_to_print):
    counter = 1

    for interval in contigs_to_print[ref_name]:
        id = ':'.join([
            str(x) for x in [ref_name, counter, interval.start, interval.end]
        ])
        print(sequences.Fasta(
            id, ref_seqs[ref_name][interval.start - 1:interval.end]),
              file=f_out)
# merge all the overalpping hits for each list of hits corresponding to one contig
for ref_name, d in nucmer_hits.items():
    for qry_name, hits in d.items():
        intervals.merge_overlapping_in_list(hits)

        for hit in hits:
            if hit.end - hit.start + 1 >= options.min_seq_length:
                if ref_name not in contigs_to_print:
                    contigs_to_print[ref_name] = []

                contigs_to_print[ref_name].append(copy.copy(hit))

# remove any contigs that are completely contained in another contig
for ref, l in contigs_to_print.items():
    intervals.remove_contained_in_list(l)

# print the final perfect contigs
f_out = utils.open_file_write(options.outprefix + ".fa")
counter = 1
last_id = None
for ref_name in sorted(contigs_to_print):
    counter = 1

    for interval in contigs_to_print[ref_name]:
        id = ":".join([str(x) for x in [ref_name, counter, interval.start, interval.end]])
        print(sequences.Fasta(id, ref_seqs[ref_name][interval.start - 1 : interval.end]), file=f_out)
        counter += 1

utils.close(f_out)