def main(argv): bamfile = argv[0] snp_gff = "/home/schudoma/projects/ngs/ped-0-snps-intergenic.gff" snps = gff_helpers.read_snp_from_gff(open(snp_gff)) genome_path = "/home/schudoma/projects/ngs/tair10/TAIR10_chr%c.fas" base_cmd = ["samtools", "mpileup", "-f"] for snp in snps: # (gffline[0], int(gffline[3]) + 1, int(gffline[4]), comments['refbase'], comments['mutation']) genome_ref = genome_path % snp[0][-1] cmd = base_cmd + [genome_ref, "-r", "%s:%i-%i" % (snp[0], snp[1] - 1, snp[1] + 1), bamfile] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = p.communicate()[0].strip() # .split('\n') if len(output) > 0: sys.stdout.write("%s:%i-%i:%c%c\n" % (snp[0], snp[1] - 1, snp[1] + 1, snp[3], snp[4])) sys.stdout.write("%s\n" % output) sys.stdout.flush() # if output[-1] != '<mpileup> Set max per-file depth to 8000': # print output[2:] # samtools mpileup -gf ~/projects/ngs/tair10/TAIR10_chr1.fas -r Chr1:10264-10265 2012-08-22/ped-N-shoot.all.sorted.bam pass
def main(argv): samfile = pysam.Samfile(argv[0], 'rb') fo = sys.stdout # fo = open('%s.covered_genes.csv' % argv[0].rstrip('.bam'), 'w') # fo2 = open('%s.covered_genes_with_reads.csv' % argv[0].rstrip('.bam'), 'w') fo2=sys.stdout # fo.write('%s\n' % ','.join(COL_HEADERS)) """ # tair10_genes.gff intragenic_regions = gff_helpers.read_intragenic_regions(open(argv[1])) intragenic_regions = sorted(intragenic_regions) # print intragenic_regions[:10] # ped-0-snps_no-indels.txt snp_d = SNPDict(open(argv[2])) # print snp_d.items()[:10] snp_d = remove_intragenic_snps(snp_d, intragenic_regions) """ snps = gff_helpers.read_snp_from_gff(open(argv[1])) count_snps = 0 print ';'.join(['contig', 'position', 'refbase_(Col)', 'mutation_(Ped)', 'total_reads', '#support_Ped', 'fr_Ped', '#support_Col', 'fr_Col']) # for snp_id, snpline in sorted(snp_d.items()): reads_out = open('READS_CHECK.dat', 'wb') for snp_id in snps: # snp_id[1] - 1 because pysam pileup is 0-based basecount = FIND_GENES.count_bases(samfile, snp_id[0], snp_id[1] - 1, reads_out) refbase = basecount.get(snp_id[3], 0.0) snpbase = basecount.get(snp_id[4], 0.0) total_reads = sum(basecount.values()) - basecount['bad'] if total_reads > 0: # line = str(snpline).split('\t')[1:5] # snp = (gffline[0], int(gffline[3] + 1), int(gffline[4]), comments['refbase'], comments['mutation']) line = [snp_id[0], snp_id[1], snp_id[3], snp_id[4]] line.extend([total_reads, snpbase, float(snpbase)/total_reads, refbase, float(refbase)/total_reads]) print ';'.join(map(str, line)) # print snpline, 'x', total_reads, snpbase, float(snpbase)/total_reads, # print refbase, float(refbase)/total_reads count_snps += 1 reads_out.close() #print '# Total SNPs: %i Covered: %i (%.3f)' % (len(snp_d), count_snps, float(count_snps)/len(snp_d)) print '# Total SNPs: %i Covered: %i (%.3f)' % (len(snps), count_snps, float(count_snps)/len(snps)) # tair10_genes.gff # process_gff(open(argv[3]), polymorphs, snp_d, samfile, fo, fo2, min_reads=MIN_NREADS) # fo2.close() fo.close() samfile.close() return None