Beispiel #1
0
    def test_cache_index(self):
        """
        Walk over a track after the index has been made but has been removed
        from the filesystem.
        """
        values = [(2, 392.0),
                  (3, 408.0),
                  (4, 420.0),
                  (5, 452.0),
                  (7, 466.0),
                  (8, 474.0),
                  (9, 479.0)]
        b = [(r, p, v) for r in ('1', '13', 'MT') for (p, v) in values]

        walker = wiggelen.walk(open_('b.wig'), force_index=True)
        for expected, item in zip(b, walker):
            assert_equal(expected, item)
        assert_raises(StopIteration, next, walker)

        remove_indices(keep_cache=True)

        walker = wiggelen.walk(open_('b.wig'))
        for expected, item in zip(b, walker):
            assert_equal(expected, item)
        assert_raises(StopIteration, next, walker)
Beispiel #2
0
 def test_walk_complex(self):
     """
     Walk over a complex track.
     """
     walker = wiggelen.walk(open_('complex.wig'))
     for _ in walker:
         pass
Beispiel #3
0
    def test_walk_fixed_step_without_step(self):
        """
        Walk over a fixed step wiggle track without `step` arguments.

        According to the spec, `fixedStep` definitions require the `step`
        argument. However, there seems to be real-world data where it is
        missing and the UCSC Genome Browser can still work with it.

        So we also support it.

        Issue: https://github.com/martijnvermaat/wiggelen/issues/1
        """
        c = [
            ('chr', 1, 64.), ('chr', 2, 64.), ('chr', 3, 65.), ('chr', 4, 66.),
            ('chr', 5, 66.), ('chr', 6, 66.), ('chr', 7, 69.), ('chr', 8, 70.),
            ('chr', 9, 71.), ('chr', 10, 71.), ('chr', 11, 71.),
            ('chr', 12, 71.), ('chr', 13, 71.), ('chr', 14, 71.),
            ('chr', 15, 71.), ('chr', 16, 71.), ('chr', 17, 71.),
            ('chr', 18, 71.), ('chr', 19, 73.), ('chr', 20, 73.),
            ('chr', 21, 73.), ('chr', 22, 73.), ('chr', 23, 73.),
            ('chr', 24, 73.), ('chr', 25, 73.), ('chr', 26, 74.),
            ('chr', 27, 75.), ('chr', 28, 75.), ('chr', 29, 75.),
            ('chr', 30, 75.), ('chr', 31, 76.)
        ]
        walker = wiggelen.walk(open_('fixedstep-without-step.wig'))
        for expected, item in zip(c, walker):
            assert_equal(expected, item)
        assert_raises(StopIteration, next, walker)
def find_max(regions, wig):
    # find max within a given region. Assumes regions are ordered and non-overlapping
    # find max by walking through wig file
    # Let's also calculate a new score sum just to be extra sure it's right. The
    # old score sum was done during merging regions, so it could have been messed up
    regions_with_max = []

    start, end, score_sum = regions.pop(0)
    max_value = -1
    max_position = None
    new_score_sum = 0
    for chrom, position, value in fill(walk(wig)):
        if start <= position and position <= end:
            new_score_sum += value
            if value >= max_value:
                max_value = value
                max_position = position
        if position >= end and max_value != -1:
            # position is past region and max_value has been recorded
            region_with_max = (start, end, new_score_sum, max_value,
                               max_position)
            regions_with_max.append(region_with_max)
            # grab new region and reset max
            if len(regions) == 0:  # break when there are no more regions
                break
            start, end, score_sum = regions.pop(0)
            max_value = -1
            max_position = None
            new_score_sum = 0

    return regions_with_max
Beispiel #5
0
 def test_walk_complex(self):
     """
     Walk over a complex track.
     """
     walker = wiggelen.walk(open_('complex.wig'))
     for _ in walker:
         pass
Beispiel #6
0
    def test_store_index(self):
        """
        Walk over a track after the index has been made.
        """
        values = [(2, 392.0), (3, 408.0), (4, 420.0), (5, 452.0), (7, 466.0),
                  (8, 474.0), (9, 479.0)]
        b = [(r, p, v) for r in ('1', '13', 'MT') for (p, v) in values]

        walker = wiggelen.walk(open_('b.wig'), force_index=True)
        for expected, item in zip(b, walker):
            assert_equal(expected, item)
        assert_raises(StopIteration, next, walker)

        walker = wiggelen.walk(open_('b.wig'))
        for expected, item in zip(b, walker):
            assert_equal(expected, item)
        assert_raises(StopIteration, next, walker)
def read_wig(filename, norm=False):
	wig_dict = {position : value for region, position, value in fill(walk(open(filename)))}
	if norm:
		print "Normalizing..."
		wig_median = float(np.median(wig_dict.values()))
		wig_norm = {x : wig_dict[x]/wig_median for x in wig_dict}
		return wig_norm
	else:
		return wig_dict
Beispiel #8
0
 def test_walk_single_region(self):
     """
     Walk over a track with a single region.
     """
     c = [('MT', 1, 364.0), ('MT', 6, 435.0), ('MT', 10, 485.0)]
     walker = wiggelen.walk(open_('c.wig'))
     for expected, item in zip(c, walker):
         assert_equal(expected, item)
     assert_raises(StopIteration, next, walker)
Beispiel #9
0
 def test_walk_fixed_step(self):
     """
     Walk over a fixed step wiggle track.
     """
     c = [('chr8', 1, 11), ('chr8', 2, 11), ('chr8', 6, 33),
          ('chr8', 7, 33), ('chr8', 11, 44), ('chr8', 12, 44)]
     walker = wiggelen.walk(open_('fixedstep.wig'))
     for expected, item in zip(c, walker):
         assert_equal(expected, item)
     assert_raises(StopIteration, next, walker)
Beispiel #10
0
 def test_walk_single_region(self):
     """
     Walk over a track with a single region.
     """
     c = [('MT', 1, 364.0),
          ('MT', 6, 435.0),
          ('MT', 10, 485.0)]
     walker = wiggelen.walk(open_('c.wig'))
     for expected, item in zip(c, walker):
         assert_equal(expected, item)
     assert_raises(StopIteration, next, walker)
Beispiel #11
0
 def test_sort_multiple_regions(self):
     """
     Walk over a track with multiple regions and index.
     """
     values = [(2, 392.0), (3, 408.0), (4, 420.0), (5, 452.0), (7, 466.0),
               (8, 474.0), (9, 479.0)]
     b = [(r, p, v) for r in ('1', '13', 'MT') for (p, v) in values]
     walker = wiggelen.walk(open_('b.wig'), force_index=True)
     for expected, item in zip(b, walker):
         assert_equal(expected, item)
     assert_raises(StopIteration, next, walker)
def main(infile, threshold, merge_dist, min_width, strand, outfile):
    # list of tuples, (start, end, avg_exp)
    called_regions = []

    start = None
    end = None
    total_exp = 0

    # fill function steps through every position, returns None if position not
    # in original wig file
    print("Calling preliminary regions...")
    wig = open(infile)
    for region, position, value in fill(walk(wig)):
        if start is None:
            # initialize start of new region to current position
            start = position
        if value is None:
            if total_exp > 0:  # if a region already exists, end it
                called_regions.append((start, end, total_exp))
            # reset start, end, and total_exp
            start = None
            end = None
            total_exp = 0
        elif value < threshold:
            if total_exp > 0:  # if a region already exists, end it
                called_regions.append((start, end, total_exp))
            # reset start, end, and total_exp
            start = None
            end = None
            total_exp = 0
        elif value >= threshold:  # value exceeds threshold, continue region
            total_exp += value
            end = position

    wig.close()

    if total_exp != 0:  # finished iterating but one last region
        called_regions.append((start, end, total_exp))

    print("Filtering out regions smaller than minimum width...")
    # filter out regions that are below minimum width
    filtered_regions = [
        x for x in called_regions if x[1] - x[0] + 1 >= min_width
    ]

    print("Merging regions...")
    merged_regions = merge_regions(filtered_regions, merge_dist)

    # find max region and re-do score sum
    # open wig file again
    print("Finding region max and calculating total score...")
    regions_with_max = find_max(merged_regions, open(infile))

    write_bed(regions_with_max, strand, outfile)
Beispiel #13
0
 def test_walk_fixed_step(self):
     """
     Walk over a fixed step wiggle track.
     """
     c = [('chr8', 1, 11),
          ('chr8', 2, 11),
          ('chr8', 6, 33),
          ('chr8', 7, 33),
          ('chr8', 11, 44),
          ('chr8', 12, 44)]
     walker = wiggelen.walk(open_('fixedstep.wig'))
     for expected, item in zip(c, walker):
         assert_equal(expected, item)
     assert_raises(StopIteration, next, walker)
Beispiel #14
0
 def test_sort_multiple_regions(self):
     """
     Walk over a track with multiple regions and index.
     """
     values = [(2, 392.0),
               (3, 408.0),
               (4, 420.0),
               (5, 452.0),
               (7, 466.0),
               (8, 474.0),
               (9, 479.0)]
     b = [(r, p, v) for r in ('1', '13', 'MT') for (p, v) in values]
     walker = wiggelen.walk(open_('b.wig'), force_index=True)
     for expected, item in zip(b, walker):
         assert_equal(expected, item)
     assert_raises(StopIteration, next, walker)
Beispiel #15
0
    def test_walk_fixed_step_without_step(self):
        """
        Walk over a fixed step wiggle track without `step` arguments.

        According to the spec, `fixedStep` definitions require the `step`
        argument. However, there seems to be real-world data where it is
        missing and the UCSC Genome Browser can still work with it.

        So we also support it.

        Issue: https://github.com/martijnvermaat/wiggelen/issues/1
        """
        c = [('chr', 1, 64.),
             ('chr', 2, 64.),
             ('chr', 3, 65.),
             ('chr', 4, 66.),
             ('chr', 5, 66.),
             ('chr', 6, 66.),
             ('chr', 7, 69.),
             ('chr', 8, 70.),
             ('chr', 9, 71.),
             ('chr', 10, 71.),
             ('chr', 11, 71.),
             ('chr', 12, 71.),
             ('chr', 13, 71.),
             ('chr', 14, 71.),
             ('chr', 15, 71.),
             ('chr', 16, 71.),
             ('chr', 17, 71.),
             ('chr', 18, 71.),
             ('chr', 19, 73.),
             ('chr', 20, 73.),
             ('chr', 21, 73.),
             ('chr', 22, 73.),
             ('chr', 23, 73.),
             ('chr', 24, 73.),
             ('chr', 25, 73.),
             ('chr', 26, 74.),
             ('chr', 27, 75.),
             ('chr', 28, 75.),
             ('chr', 29, 75.),
             ('chr', 30, 75.),
             ('chr', 31, 76.)]
        walker = wiggelen.walk(open_('fixedstep-without-step.wig'))
        for expected, item in zip(c, walker):
            assert_equal(expected, item)
        assert_raises(StopIteration, next, walker)
Beispiel #16
0
    return wig_sum


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('bed', help='bed file')
    parser.add_argument('plus_wig', help='wig file for plus strand')
    parser.add_argument('minus_wig', help='wig file for minus strand')
    parser.add_argument('output_name', help='name of output file')

    args = parser.parse_args()

    plus_wig_dict = {
        position: value
        for region, position, value in fill(walk(open(args.plus_wig)))
    }
    minus_wig_dict = {
        position: value
        for region, position, value in fill(walk(open(args.minus_wig)))
    }

    outfile = open(args.output_name, 'w')

    with open(args.bed) as infile:
        for line in infile:
            fields = line.strip().split()
            chrom = fields[0]
            start = int(fields[1])
            end = int(fields[2])
            name = fields[3]
Beispiel #17
0
import sys
import wiggelen

wig_fname = sys.argv[1]
threshold = int(sys.argv[2])	# the lowest coverage of high-coverage region.

# Read data frame from output of 'samtools depth'.

wig_file = wiggelen.walk(open(wig_fname))


# Set the beginning state 'LOW'.

state = 'LOW'
reference_name = None
# Search and print regions.
for reference, position, value in wig_file:
	
	if state == 'LOW':
		if value >= threshold:
			reference_name = reference
			beg = position
			end = position
			state = 'HIGH'
	
	elif state == 'HIGH':
		
		# The next reference has started.
		if reference_name != reference:
			print(str(reference_name) + '\t' + str(beg) + '\t' + str(end))
			state = 'LOW'
Beispiel #18
0
    ofile = output_area + '/' + base + '.covered_bases.tsv'

    of = open(ofile, 'wt')
    L = fbed.readline().strip().split()
    # assume order of bed and wig both in chromosome position order
    n0 = 0
    n1 = 0
    pp1 = 0
    pp2 = 0
    c0 = 'A'
    p1 = genome_pos(L[0], L[1])
    p2 = genome_pos(L[0], L[2])
    n = 0
    LT = LT + p2 - p1

    for x in wiggelen.walk(fw):
        n = n + 1
        wp = genome_pos(x[0], x[1])
        if wp > p2:  # write out interval line, read next interval (pp1,pp2,p1,p2), reset n1,n2
            of.write('%s\t%s\t%d\t%d\t%d\n' %
                     (L[0], L[1], int(L[2]) - int(L[1]), n0, n1))
            if (L[0] != c0):
                print('%s\t%d' % (L[0], n))
                c0 = L[0]

            N0 = N0 + n0
            N1 = N1 + n1
            n0 = 0
            n1 = x[2]
            pp1 = p1
            pp2 = p2
Beispiel #19
0
	Modified by: Zaka Yuen, JCSMR, ANU
	Created on Nov 2019

	Tombo can only output methylation scores in a wig format.

	This script is to:
	-convert a wiggle format to a text format containing per-site scores for downstream analyses
"""

# Positive strand
r=[]
p=[]
v=[]

with open(snakemake.input[0],'r') as fh:
    for region, position, value in wiggelen.walk(fh):
        r.append(region)
        p.append(position)
        v.append(value)
    d_plus = pd.DataFrame(list(zip(r,p,v)), columns =['names', 'start','values'])
    d_plus["strand"]="+"

# Negative strand
r_m=[]
p_m=[]
v_m=[]

with open(snakemake.input[1],'r') as fh:
    for region, position, value in wiggelen.walk(fh):
        r_m.append(region)
        p_m.append(position)