Example #1
0
    def test_count(self):
        count = _update_conversions(self.ref, self.aln, 0, "CT", self.cc,
                                    self.tt, 10, len(self.ref), "")
        self.assertEquals(count, 0, (count))

        count = _update_conversions(self.ref, self.aln, 0, "CT", self.cc,
                                    self.tt, 0, len(self.ref), "")
        self.assertEquals(count, 1, (count))
Example #2
0
 def test_gas(self):
     _update_conversions(self.ref, self.aln, 0, "GA", self.cc, self.tt, 10, len(self.ref), False)
     for c, t, r, a in zip(self.cc, self.tt, self.ref, self.aln):
         if c != 0:
             self.assertEquals(r, "G")
             self.assertEquals(a, "G")
         if t != 0:
             self.assertEquals(r, "G")
             self.assertEquals(a, "A")
Example #3
0
    def test_cts(self):
        _update_conversions(self.ref, self.aln, 0, "CT", self.cc, self.tt, 10, len(self.ref), False)

        self.assertEquals(self.cc.tolist(),  [0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L])
        self.assertEquals(self.tt.tolist(),  [0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L])

        _update_conversions(self.ref, self.aln, 0, "CT", self.cc, self.tt, 10, len(self.ref), False)

        self.assertEquals(self.cc.tolist(),  [0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L])
        self.assertEquals(self.tt.tolist(),  [0L, 2L, 2L, 0L, 0L, 0L, 0L, 0L])
Example #4
0
 def test_gas(self):
     _update_conversions(self.ref, self.aln, 0, "GA", self.cc, self.tt, 10,
                         len(self.ref), "")
     for c, t, r, a in zip(self.cc, self.tt, self.ref, self.aln):
         if c != 0:
             self.assertEquals(r, "G")
             self.assertEquals(a, "G")
         if t != 0:
             self.assertEquals(r, "G")
             self.assertEquals(a, "A")
Example #5
0
    def test_cts(self):
        _update_conversions(self.ref, self.aln, 0, "CT", self.cc, self.tt, 10,
                            len(self.ref), "")

        self.assertEquals(self.cc.tolist(), [0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L])
        self.assertEquals(self.tt.tolist(), [0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L])

        _update_conversions(self.ref, self.aln, 0, "CT", self.cc, self.tt, 10,
                            len(self.ref), "")

        self.assertEquals(self.cc.tolist(), [0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L])
        self.assertEquals(self.tt.tolist(), [0L, 2L, 2L, 0L, 0L, 0L, 0L, 0L])
Example #6
0
    def test_count(self):
        count = _update_conversions(self.ref, self.aln, 0, "CT", self.cc, self.tt, 10, len(self.ref), False)
        self.assertEquals(count, 0, (count))

        count = _update_conversions(self.ref, self.aln, 0, "CT", self.cc, self.tt, 0, len(self.ref), False)
        self.assertEquals(count, 1, (count))
Example #7
0
def parse_gsnap_sam(gsnap_f, ref_path, out_dir, paired_end):
    fa = Fasta(ref_path)

    fc, ft, fmethyltype = \
            bin_paths_from_fasta(fa.fasta_name, out_dir)
    counts = get_counts(fc, ft, fa)
    chr_lengths = dict((k, len(fa[k])) for k in fa.iterkeys())


    print >>sys.stderr, "tabulating methylation"
    gsnap_subset = open(gsnap_f.replace(".gsnap.sam", ".sam"), "w")

    for sline in open(gsnap_f):
        if sline.startswith("@SQ"):
            print >>gsnap_subset, sline.strip()
            continue

        # the ends didn't map to same spot.
        line = sline.split("\t")
        sam_flag = int(line[1])
        if paired_end:
            if line[6] != "=": continue
            #print >>gsnap_subset, sline.strip()
        else:
            # no reported alignments.
            if sam_flag == 4: continue

        print >>gsnap_subset, sline.rstrip("\n")

        seqid = line[2]
        aln_seq = line[9]
        read_length = len(aln_seq)
        bp0 = int(line[3]) - 1
        ga = ((sam_flag & 16) != 0) ^ (sam_flag & 128 != 0)
        insert_length = int(line[8])
            #line[9] = aln_seq
            #line[10] = line[10][:len(aln_seq)]

        # both ends start at exactly the same place.
        if insert_length == 0: continue
        # handle overlapping reads. one side has + insert, the other is -
        if -read_length < insert_length < 0:
            insert_length = abs(insert_length)
            aln_seq = aln_seq[:-(read_length - insert_length)]
            read_length = len(aln_seq)
        if line[7] == '0': continue

        bp1 = bp0 + read_length
        ref_seq = (fa[seqid][bp0:bp1]).upper()


        letters = 'GA' if ga else 'CT'
        read_length = len(ref_seq)
        assert read_length > 0, (bp0, bp1)
        _update_conversions(ref_seq, aln_seq, bp0, letters,
                            counts[seqid]['c'], counts[seqid]['t'],
                            50, read_length, line[5])

    write_files(fa.fasta_name, out_dir, counts)

    cmd = open(out_dir +"/cmd.ran", "w")
    import datetime
    print >>cmd, "#date:", str(datetime.date.today())
    print >>cmd, "#path:", op.abspath(".")
    print >>cmd, " ".join(sys.argv)
    write_sam_commands(out_dir, fa, "methylcoder.gsnap")