class BamToDataConverter: def __init__(self, normal_bam_filename, tumor_bam_filename, reference_genome_filename, input_filename_base, segments_bed, min_depth=20, min_bqual=10, min_mqual=10, process_num=1): self.normal_bam_filename = normal_bam_filename self.tumor_bam_filename = tumor_bam_filename self.reference_genome_filename = reference_genome_filename self.input_filename_base = input_filename_base self.segments_bed = segments_bed self.min_depth = min_depth self.min_bqual = min_bqual self.min_mqual = min_mqual self.process_num = process_num self.data = Data() def convert(self): self._load_segments() self._get_counts() self._get_LOH_frac() data_file_name = self.input_filename_base + '.MixClone.input.pkl' outfile = open(data_file_name, 'wb') pkl.dump(self.data, outfile, protocol=2) outfile.close() def _load_segments(self): normal_bam = pysam.Samfile(self.normal_bam_filename, 'rb') tumor_bam = pysam.Samfile(self.tumor_bam_filename, 'rb') print 'Loading segments by {0}...'.format(self.segments_bed) sys.stdout.flush() self.data.load_segments(normal_bam, tumor_bam, self.segments_bed) normal_bam.close() tumor_bam.close() def _get_counts(self): seg_num = self.data.seg_num process_num = self.process_num if process_num > seg_num: process_num = seg_num pool = Pool(processes = process_num) args_list = [] for j in range(0, seg_num): seg_name = self.data.segments[j].name chrom_name = self.data.segments[j].chrom_name chrom_idx = self.data.segments[j].chrom_idx start = self.data.segments[j].start end = self.data.segments[j].end args_tuple = (seg_name, chrom_name, chrom_idx, start, end, self.normal_bam_filename, self.tumor_bam_filename, self.reference_genome_filename, self.min_depth, self.min_bqual, self.min_mqual) args_list.append(args_tuple) counts_tuple_list = pool.map(process_by_segment, args_list) for j in range(0, seg_num): paired_counts_j, BAF_counts_j = counts_tuple_list[j] self.data.segments[j].paired_counts = paired_counts_j self.data.segments[j].BAF_counts = BAF_counts_j def _get_LOH_frac(self): self.data.get_LOH_frac()