def statAll(self): out = open('%s/reads_stat.xls'%self.path['QC'],'w') out.write('compact\tsample_name\tdata_type\tlib_method\traw_reads\tpandaseq_reads\tHQ_reads\tHQ_ratio\tTotal_ratio\tneeded_reads\tneed_to_reseq\n') for compact,data_type_hash in self.sample_struct.iteritems(): for data_type,lib_method_hash in data_type_hash.iteritems(): for lib_method,sampleinfo in lib_method_hash.iteritems(): for sample,infos in sampleinfo.iteritems(): t = threading.Thread(target=self.stat_raw_reads,args=(compact,data_type,lib_method,sample)) self.__active_threads.add(t) t.start() while True: if threading.activeCount() < self.concurrency: break for t in threading.enumerate(): if t in self.__active_threads: t.join() sort_sample_file = '%s/sam_barcode.all'%self.path['split'] for (compact,sample_name,barcode_info,data_type,lib_method,needed_reads) in parse_sam_all(sort_sample_file): key_list = [compact,data_type,lib_method,sample_name] if not self.check_keys( key_list , self.sample_struct ): out.write('%s\t%s\t%s\t%s\t%s\t%s\n'%(compact,sample_name,data_type,lib_method,'None')) continue item = self.sample_struct[compact][data_type][lib_method][sample_name] if int(item['raw_reads']) != 0: t_ratio = int(item['HQ_reads']) / int(item['raw_reads']) * 100 else: t_ratio = 0 out_str = str(MyList((item['raw_reads'],item['pandaseq_reads'],item['HQ_reads'],item['HQ_ratio']))) need_to_reseq = int(needed_reads) - int(item['HQ_reads']) out.write('%s\t%s\t%s\t%s\t%s\t%2.2f%%\t%s\t%s\n'%(compact,sample_name,data_type,lib_method,out_str,t_ratio,needed_reads,need_to_reseq)) out.close()
def get_info(self): self.compact_data_type = {} self.needed_reads = {} sam_barcode_file = '%s/sam_barcode.all'%self.path['split'] for (compact,sample_name,barcode_info,data_type,lib_method,data_needed) in parse_sam_all(sam_barcode_file): compact_path = '%s/%s'%(self.path['QC'],compact) if compact not in self.compact_data_type: self.compact_data_type[compact] = data_type self.needed_reads[compact] = {} elif self.compact_data_type[compact] != data_type: stderr.write('The compact %s has two diffrent data_type!'%compact) self.needed_reads[compact][sample_name] = int( data_needed )
def getSampleStruct(self): self.sample_struct = {} sam_file = '%s/sam_barcode.all'%self.path['split'] for ( compact,sample_name,barcode_info,data_type,lib_method,needed_reads ) in parse_sam_all(sam_file): if compact not in self.sample_struct: self.sample_struct[compact] = {} if data_type not in self.sample_struct[compact]: self.sample_struct[compact][data_type] = {} if lib_method not in self.sample_struct[compact][data_type]: self.sample_struct[compact][data_type][lib_method] = {} if sample_name not in self.sample_struct[compact][data_type][lib_method]: self.sample_struct[compact][data_type][lib_method][sample_name] = { 'pandaseq_reads' : 0, 'HQ_reads' : 0, 'HQ_ratio' : 0, 'raw_reads' : 0, 'needed_reads' : int(needed_reads), } if lib_method not in self.total_reads: self.total_reads[lib_method] = 0
def _create_samples(self): for sam_barcode_file in self.sam_barcode_files: lib_method = get_lib_method(sam_barcode_file) if lib_method == None: continue sys.stdout.write('sam_barcode_file: %s ... ok\n'%sam_barcode_file) for (compact,sample_name,barcode_info,data_type,lib_method,needed_data) in parse_sam_all(sam_barcode_file): sample = WorkPerSample(self.work_path,compact,sample_name,lib_method,data_type) if not sample: continue yield sample