def test_pattern(self): paths = bin_paths_from_fasta(self.fasta, pattern_only=True) self.assertEquals(paths, 'a.%s.*.bin') paths = bin_paths_from_fasta(self.fasta, pattern_only=True, out_dir="/a") self.assertEquals(paths, '/a/a.%s.*.bin')
def test_paths(self): paths = bin_paths_from_fasta(self.fasta) self.assertEquals(paths, ['a.%s.c.bin', 'a.%s.t.bin', 'a.%s.methyltype.bin'])
def parse_gsnap_sam(gsnap_f, ref_path, out_dir, paired_end): fa = Fasta(ref_path) fc, ft, fmethyltype = \ bin_paths_from_fasta(fa.fasta_name, out_dir) counts = get_counts(fc, ft, fa) chr_lengths = dict((k, len(fa[k])) for k in fa.iterkeys()) print >>sys.stderr, "tabulating methylation" gsnap_subset = open(gsnap_f.replace(".gsnap.sam", ".sam"), "w") for sline in open(gsnap_f): if sline.startswith("@SQ"): print >>gsnap_subset, sline.strip() continue # the ends didn't map to same spot. line = sline.split("\t") sam_flag = int(line[1]) if paired_end: if line[6] != "=": continue #print >>gsnap_subset, sline.strip() else: # no reported alignments. if sam_flag == 4: continue print >>gsnap_subset, sline.rstrip("\n") seqid = line[2] aln_seq = line[9] read_length = len(aln_seq) bp0 = int(line[3]) - 1 ga = ((sam_flag & 16) != 0) ^ (sam_flag & 128 != 0) insert_length = int(line[8]) #line[9] = aln_seq #line[10] = line[10][:len(aln_seq)] # both ends start at exactly the same place. if insert_length == 0: continue # handle overlapping reads. one side has + insert, the other is - if -read_length < insert_length < 0: insert_length = abs(insert_length) aln_seq = aln_seq[:-(read_length - insert_length)] read_length = len(aln_seq) if line[7] == '0': continue bp1 = bp0 + read_length ref_seq = (fa[seqid][bp0:bp1]).upper() letters = 'GA' if ga else 'CT' read_length = len(ref_seq) assert read_length > 0, (bp0, bp1) _update_conversions(ref_seq, aln_seq, bp0, letters, counts[seqid]['c'], counts[seqid]['t'], 50, read_length, line[5]) write_files(fa.fasta_name, out_dir, counts) cmd = open(out_dir +"/cmd.ran", "w") import datetime print >>cmd, "#date:", str(datetime.date.today()) print >>cmd, "#path:", op.abspath(".") print >>cmd, " ".join(sys.argv) write_sam_commands(out_dir, fa, "methylcoder.gsnap")