Exemple #1
0
 def statAll(self):
     out = open('%s/reads_stat.xls'%self.path['QC'],'w')
     out.write('compact\tsample_name\tdata_type\tlib_method\traw_reads\tpandaseq_reads\tHQ_reads\tHQ_ratio\tTotal_ratio\tneeded_reads\tneed_to_reseq\n')
     for compact,data_type_hash in self.sample_struct.iteritems():
         for data_type,lib_method_hash in data_type_hash.iteritems():
             for lib_method,sampleinfo in lib_method_hash.iteritems():
                 for sample,infos in sampleinfo.iteritems():
                     t = threading.Thread(target=self.stat_raw_reads,args=(compact,data_type,lib_method,sample))
                     self.__active_threads.add(t)
                     t.start()
                     while True:
                         if threading.activeCount() < self.concurrency:
                             break
     for t in threading.enumerate():
         if t in self.__active_threads:
             t.join()
     sort_sample_file = '%s/sam_barcode.all'%self.path['split']
     for (compact,sample_name,barcode_info,data_type,lib_method,needed_reads) in parse_sam_all(sort_sample_file):
         key_list = [compact,data_type,lib_method,sample_name]
         if not self.check_keys( key_list , self.sample_struct ):
             out.write('%s\t%s\t%s\t%s\t%s\t%s\n'%(compact,sample_name,data_type,lib_method,'None'))
             continue
         item = self.sample_struct[compact][data_type][lib_method][sample_name]
         if int(item['raw_reads']) != 0:
             t_ratio = int(item['HQ_reads']) / int(item['raw_reads']) * 100
         else:
             t_ratio = 0
         out_str = str(MyList((item['raw_reads'],item['pandaseq_reads'],item['HQ_reads'],item['HQ_ratio'])))
         need_to_reseq = int(needed_reads) - int(item['HQ_reads'])
         out.write('%s\t%s\t%s\t%s\t%s\t%2.2f%%\t%s\t%s\n'%(compact,sample_name,data_type,lib_method,out_str,t_ratio,needed_reads,need_to_reseq))
     out.close()
Exemple #2
0
    def get_info(self):
        self.compact_data_type = {}
        self.needed_reads = {}
        sam_barcode_file = '%s/sam_barcode.all'%self.path['split']
        for (compact,sample_name,barcode_info,data_type,lib_method,data_needed) in parse_sam_all(sam_barcode_file):
            compact_path = '%s/%s'%(self.path['QC'],compact)
            if compact not in self.compact_data_type:
                self.compact_data_type[compact] = data_type
                self.needed_reads[compact] = {}
            elif self.compact_data_type[compact] != data_type:
                stderr.write('The compact %s has two diffrent data_type!'%compact)

            self.needed_reads[compact][sample_name] = int( data_needed )
Exemple #3
0
    def getSampleStruct(self):
        self.sample_struct = {}
        sam_file = '%s/sam_barcode.all'%self.path['split']
        for ( compact,sample_name,barcode_info,data_type,lib_method,needed_reads ) in parse_sam_all(sam_file):
            if compact not in self.sample_struct:
                self.sample_struct[compact] = {}
            if data_type not in self.sample_struct[compact]:
                self.sample_struct[compact][data_type] = {}
            if lib_method not in self.sample_struct[compact][data_type]:
                self.sample_struct[compact][data_type][lib_method] = {}
            if sample_name not in self.sample_struct[compact][data_type][lib_method]:
                self.sample_struct[compact][data_type][lib_method][sample_name] = {                           
                            'pandaseq_reads'     :   0,
                            'HQ_reads'      :   0,
                            'HQ_ratio'      :   0,
                            'raw_reads'    :   0,
                            'needed_reads'  :   int(needed_reads),
}
            if lib_method not in self.total_reads:
                self.total_reads[lib_method] = 0
Exemple #4
0
 def _create_samples(self):
     for sam_barcode_file in self.sam_barcode_files:
         lib_method = get_lib_method(sam_barcode_file)
         if lib_method == None:
             continue
         sys.stdout.write('sam_barcode_file: %s          ... ok\n'%sam_barcode_file)
         for (compact,sample_name,barcode_info,data_type,lib_method,needed_data) in parse_sam_all(sam_barcode_file):
             sample = WorkPerSample(self.work_path,compact,sample_name,lib_method,data_type)
             if not sample:
                 continue
             yield sample