Esempio n. 1
0
def main(argv):

    bamfile = argv[0]

    snp_gff = "/home/schudoma/projects/ngs/ped-0-snps-intergenic.gff"
    snps = gff_helpers.read_snp_from_gff(open(snp_gff))

    genome_path = "/home/schudoma/projects/ngs/tair10/TAIR10_chr%c.fas"

    base_cmd = ["samtools", "mpileup", "-f"]
    for snp in snps:
        # (gffline[0], int(gffline[3]) + 1, int(gffline[4]), comments['refbase'], comments['mutation'])

        genome_ref = genome_path % snp[0][-1]
        cmd = base_cmd + [genome_ref, "-r", "%s:%i-%i" % (snp[0], snp[1] - 1, snp[1] + 1), bamfile]

        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        output = p.communicate()[0].strip()  # .split('\n')
        if len(output) > 0:
            sys.stdout.write("%s:%i-%i:%c%c\n" % (snp[0], snp[1] - 1, snp[1] + 1, snp[3], snp[4]))
            sys.stdout.write("%s\n" % output)
            sys.stdout.flush()

        # if output[-1] != '<mpileup> Set max per-file depth to 8000':
        #    print output[2:]

    # samtools mpileup -gf ~/projects/ngs/tair10/TAIR10_chr1.fas -r Chr1:10264-10265 2012-08-22/ped-N-shoot.all.sorted.bam

    pass
Esempio n. 2
0
def main(argv):

    samfile = pysam.Samfile(argv[0], 'rb')
    fo = sys.stdout
    # fo = open('%s.covered_genes.csv' % argv[0].rstrip('.bam'), 'w')    
    # fo2 = open('%s.covered_genes_with_reads.csv' % argv[0].rstrip('.bam'), 'w')
    fo2=sys.stdout

    # fo.write('%s\n' % ','.join(COL_HEADERS))
    
    """
    # tair10_genes.gff
    intragenic_regions = gff_helpers.read_intragenic_regions(open(argv[1]))
    intragenic_regions = sorted(intragenic_regions)
    # print intragenic_regions[:10]
    
    # ped-0-snps_no-indels.txt
    snp_d = SNPDict(open(argv[2]))  
    # print snp_d.items()[:10]
    snp_d = remove_intragenic_snps(snp_d, intragenic_regions)
    """
    snps = gff_helpers.read_snp_from_gff(open(argv[1]))
    
    count_snps = 0
    print ';'.join(['contig', 'position', 'refbase_(Col)', 'mutation_(Ped)', 'total_reads', 
                    '#support_Ped', 'fr_Ped', '#support_Col', 'fr_Col'])
    
    # for snp_id, snpline in sorted(snp_d.items()):
    reads_out = open('READS_CHECK.dat', 'wb')
    for snp_id in snps:        
        # snp_id[1] - 1 because pysam pileup is 0-based
        basecount = FIND_GENES.count_bases(samfile, snp_id[0], snp_id[1] - 1, reads_out) 
    
        refbase = basecount.get(snp_id[3], 0.0)
        snpbase = basecount.get(snp_id[4], 0.0)
        
        total_reads = sum(basecount.values()) - basecount['bad']
        
        if total_reads > 0:
            
            # line = str(snpline).split('\t')[1:5]
            # snp = (gffline[0], int(gffline[3] + 1), int(gffline[4]), comments['refbase'], comments['mutation'])
            
            line = [snp_id[0], snp_id[1], snp_id[3], snp_id[4]]            
            
            line.extend([total_reads, 
                         snpbase, float(snpbase)/total_reads,
                         refbase, float(refbase)/total_reads])
            print ';'.join(map(str, line))
                    
            # print snpline, 'x', total_reads, snpbase, float(snpbase)/total_reads,
            # print refbase, float(refbase)/total_reads 
            count_snps += 1
    reads_out.close()
    #print '# Total SNPs: %i Covered: %i (%.3f)' % (len(snp_d), count_snps, float(count_snps)/len(snp_d)) 
    print '# Total SNPs: %i Covered: %i (%.3f)' % (len(snps), count_snps, float(count_snps)/len(snps)) 
    
    # tair10_genes.gff
    # process_gff(open(argv[3]), polymorphs, snp_d, samfile, fo, fo2, min_reads=MIN_NREADS)
    # fo2.close()
    
    
    fo.close()
    samfile.close()
    return None