def main( args ): varscan_file_name = args.varscan_file_name jcnt_file_name = args.jcnt_file_name varscan_reader = csv.DictReader( open( varscan_file_name ), delimiter='\t' ) jcnt_file = JointCountsFile( jcnt_file_name, 'w' ) rows = {} i = 0 for row in varscan_reader: if row['var'] == '': row['var'] = 'N' chr_name = row['chrom'] if chr_name not in rows: rows[chr_name] = [] chr_coord = int( row['position'] ) normal_ref_counts = int( row['normal_reads1'] ) tumour_ref_counts = int( row['tumor_reads1'] ) normal_non_ref_counts = int( row['normal_reads2'] ) tumour_non_ref_counts = int( row['tumor_reads2'] ) normal_counts = ( normal_ref_counts, normal_non_ref_counts ) tumour_counts = ( tumour_ref_counts, tumour_non_ref_counts ) ref_base = row['ref'] normal_non_ref_base = row['var'] tumour_non_ref_base = row['var'] jcnt_entry = [ chr_coord, ref_base, normal_non_ref_base, tumour_non_ref_base ] jcnt_entry.extend( normal_counts ) jcnt_entry.extend( tumour_counts ) rows[chr_name].append( jcnt_entry ) print "\t".join( [str( x ) for x in jcnt_entry] ) i += 1 if i >= 1e4: print chr_name, chr_coord write_rows( jcnt_file, rows ) rows = {} i = 0 # Last call to write remaining rows. write_rows( jcnt_file, rows ) jcnt_file.close()
def main( args ): if args.bzip2: mpileup_file = bz2.BZ2File( args.mpileup_file_name ) else: mpileup_file = open( args.mpileup_file_name ) reader = get_reader( mpileup_file ) jcnt_file = JointCountsFile( args.jcnt_file_name, 'w' ) rows = {} i = 0 for row in reader: chr_name = row['chr_name'] if chr_name not in rows: rows[chr_name] = [] chr_coord = int( row['chr_coord'] ) normal_depth = int( row['normal_depth'] ) tumour_depth = int( row['tumour_depth'] ) ref_base = row['ref_base'].upper() # Skip lines below coverage threshold. if normal_depth < args.min_depth or tumour_depth < args.min_depth: continue normal_bases = get_bases( ref_base, row['normal_call_string'], row['normal_base_qual_string'], args.min_qual ) tumour_bases = get_bases( ref_base, row['tumour_call_string'], row['tumour_base_qual_string'], args.min_qual ) normal_non_ref_base, normal_counts = get_counts( ref_base, normal_bases ) tumour_non_ref_base, tumour_counts = get_counts( ref_base, tumour_bases ) # Check again for lines below read depth. The first check above speeds things up, though redundant. d_N = normal_counts[0] + normal_counts[1] d_T = tumour_counts[0] + tumour_counts[1] if d_N < args.min_depth or d_T < args.min_depth: continue jcnt_entry = [ chr_coord, ref_base, normal_non_ref_base, tumour_non_ref_base ] jcnt_entry.extend( normal_counts ) jcnt_entry.extend( tumour_counts ) rows[chr_name].append( jcnt_entry ) i += 1 if i >= 1e5: print chr_name, chr_coord write_rows( jcnt_file, rows ) rows = {} i = 0 # Last call to write remaining rows. write_rows( jcnt_file, rows ) jcnt_file.close() mpileup_file.close()