コード例 #1
0
def make_hits_union(ids, d1, d2):
    d = {}

    for id in ids:
        d[id] = []
        if id in d1:
            d[id].extend(d1[id])
        if id in d2:
            d[id].extend(d2[id])

        genome_intervals.merge_overlapping_in_list(d[id])

    return d
コード例 #2
0
    def test_merge_overlapping_in_list(self):
        '''merge_overlapping_in_list() merges correctly'''
        a = [
            genome_intervals.Interval(1, 2),
            genome_intervals.Interval(51, 60),
            genome_intervals.Interval(10, 20),
            genome_intervals.Interval(20, 30),
            genome_intervals.Interval(20, 30),
            genome_intervals.Interval(29, 50),
            genome_intervals.Interval(65, 70)
        ]

        b = [
            genome_intervals.Interval(1, 2),
            genome_intervals.Interval(10, 60),
            genome_intervals.Interval(65, 70)
        ]

        genome_intervals.merge_overlapping_in_list(a)
        self.assertSequenceEqual(a, b)
コード例 #3
0
def get_nucmer_hits(coords_file):
    qry_hits = {}
    ref_hits = {}

    nucmer_reader = nucmer.file_reader(coords_file)
    for hit in nucmer_reader:
        # nucmer hits are 1-based. INside the script, use 0-based.
        start, end = sorted([hit.ref_start - 1, hit.ref_end - 1])
        if hit.ref_name not in ref_hits:
            ref_hits[hit.ref_name] = []
        ref_hits[hit.ref_name].append(genome_intervals.Interval(start, end))

        start, end = sorted([hit.qry_start - 1, hit.qry_end - 1])
        if hit.qry_name not in qry_hits:
            qry_hits[hit.qry_name] = []
        qry_hits[hit.qry_name].append(genome_intervals.Interval(start, end))

    for l in ref_hits.values():
        genome_intervals.merge_overlapping_in_list(l)
    for l in qry_hits.values():
        genome_intervals.merge_overlapping_in_list(l)


    return ref_hits, qry_hits
コード例 #4
0

def file2regions(fname):
    regions = {}

    f = utils.open_file_read(fname)

    for line in f:
        if line.startswith('#'):
            continue

        (chr, start, end) = line.rstrip().split()
        if chr not in regions:
            regions[chr] = []

        regions[chr].append(genome_intervals.Interval(start, end))

    utils.close(f)
    return regions


regions = file2regions(options.infile)
f = utils.open_file_write(options.outfile)

for chr, l in sorted(regions.items()):
    genome_intervals.merge_overlapping_in_list(l)
    for region in l:
        print(chr, region.start, region.end, sep='\t', file=f)

utils.close(f)