def main(argv): """ Input EITHER (to generate per-transcript-results from pileup counts) 0. transcript/snp data 1. sample name {Col,Ped} 2. output prefix (output is <prefix>_<DATATYPE>.pickled) 3-n. number of bamfiles OR (to summarise per-transcript-results) 0. name of a <arbitrary>_TRANSCRIPTDATA.pickled file 1. sample name {Col,Ped} """ if len(argv) > 3: sys.stderr.write('%s: MODE1\n' % get_timestamp()) sys.stderr.write('') transcript_d, snp_d = read_transcript_data(open(argv[0])) # show_data(transcript_d, snp_d) sample = argv[1] prefix = argv[2] read_checklist = set([]) for bam_fn in argv[3:]: sys.stderr.write('%s: Processing file %s...\n' % (get_timestamp(), bam_fn)) process_pileups(pysam.Samfile(bam_fn, 'rb'), snp_d, read_checklist) # show_data(transcript_d, snp_d) # break # print list(read_checklist), len(read_checklist) ts = get_timestamp() pickle.dump(snp_d, open(prefix + '_SNPDATA.pickled', 'wb')) pickle.dump(transcript_d, open(prefix + '_TRANSCRIPTDATA.pickled', 'wb')) pickle.dump(read_checklist, open(prefix + '_READCHECKLIST.pickled', 'wb')) # sys.exit(0) else: sys.stderr.write('%s: MODE2\n' % get_timestamp()) sys.stderr.write('%s: Loading data from %s.\n' % (get_timestamp(), argv[0])) # transcript_d = pickle.load(open(argv[0], 'rb')) transcript_d = load_transcript_data(open(argv[0], 'rb'), sample=argv[1]) sys.stderr.write('%s: Finished loading %s.\n' % (get_timestamp(), argv[0])) prefix = argv[0].rstrip('TRANSCRIPTDATA.pickled') # print '======' #show_data(transcript_d, None, sample=argv[1]) write_data(transcript_d, sample=argv[1], prefix=prefix) pass
def main(argv): sys.stderr.write('%s: Reading transcript/SNP data\n' % get_timestamp()) transcript_d, snp_d = read_transcript_data(open(argv[0])) for bam_fn in argv[1:]: sys.stderr.write('%s: Processing file %s...\n' % (get_timestamp(), bam_fn)) infile = pysam.Samfile(bam_fn, 'rb') mapping_reads = get_mapping_reads(infile, snp_d) bam_fn_out = bam_fn.replace('.bam', '.prefiltered.bam') outfile = pysam.Samfile(bam_fn_out, 'wb', template=infile) for read in infile.fetch(): if read.qname in mapping_reads: outfile.write(read) outfile.close() infile.close() pass