def check_run_status(self): """ This function checks the status of a run while in progress. In the case of HiSeq check that all demux have been done and in that case perform aggregation """ run_dir = self.run_dir dex_status = self.get_run_status() #in this case I have already finished all demux jobs and I have aggregate all stasts unded Demultiplexing if dex_status == 'COMPLETED': return None #otherwise check the status of running demux #collect all samplesheets generated before samplesheets = glob.glob( os.path.join(run_dir, "*_[0-9].csv") ) # a single digit... this hipotesis should hold for a while allDemuxDone = True for samplesheet in samplesheets: #fetch the id of this demux job demux_id = os.path.splitext( os.path.split(samplesheet)[1])[0].split("_")[1] #demux folder is demux_folder = os.path.join(run_dir, "Demultiplexing_{}".format(demux_id)) #check if this job is done if os.path.exists( os.path.join(run_dir, demux_folder, 'Stats', 'DemultiplexingStats.xml')): allDemuxDone = allDemuxDone and True logger.info( "Sub-Demultiplexing in {} completed.".format(demux_folder)) else: allDemuxDone = allDemuxDone and False logger.info( "Sub-Demultiplexing in {} not completed yet.".format( demux_folder)) #in this case, I need to aggreate in the Demultiplexing folder all the results if allDemuxDone: self._aggregate_demux_results() #now I can initialise the RunParser self.runParserObj = RunParser(self.run_dir) #and now I can rename undetermined if needed lanes = misc.return_unique([ lanes['Lane'] for lanes in self.runParserObj.samplesheet.data ]) samples_per_lane = self.get_samples_per_lane() for lane in lanes: if self.is_unpooled_lane(lane): self._rename_undet(lane, samples_per_lane)
def check_QC(self): run_dir = self.run_dir dmux_folder = self.demux_dir max_percentage_undetermined_indexes_pooled_lane = self.CONFIG['QC'][ 'max_percentage_undetermined_indexes_pooled_lane'] max_percentage_undetermined_indexes_unpooled_lane = self.CONFIG['QC'][ 'max_percentage_undetermined_indexes_unpooled_lane'] minimum_percentage_Q30_bases_per_lane = self.CONFIG['QC'][ 'minimum_percentage_Q30_bases_per_lane'] minimum_yield_per_lane = self.CONFIG['QC']['minimum_yield_per_lane'] max_frequency_most_represented_und_index_pooled_lane = self.CONFIG[ 'QC']['max_frequency_most_represented_und_index_pooled_lane'] max_frequency_most_represented_und_index_unpooled_lane = self.CONFIG[ 'QC']['max_frequency_most_represented_und_index_unpooled_lane'] if not self.runParserObj.samplesheet or not self.runParserObj.lanebarcodes or not self.runParserObj.lanes: logger.error( "Something went wrong while parsing demultiplex results. QC cannot be performed." ) return False status = True #initialise status as passed #read the samplesheet and fetch all lanes lanes_to_qc = misc.return_unique( [lanes['Lane'] for lanes in self.runParserObj.samplesheet.data]) path_per_lane = self.get_path_per_lane() samples_per_lane = self.get_samples_per_lane() #now for each lane for lane in lanes_to_qc: lane_status = True #QC lane yield if self.lane_check_yield(lane, minimum_yield_per_lane): lane_status = lane_status and True else: logger.warn( "lane {} did not pass yield qc check. This FC will not be transferred." .format(lane)) lane_status = lane_status and False #QC on the total %>Q30 of the all lane if self.lane_check_Q30(lane, minimum_percentage_Q30_bases_per_lane): lane_status = lane_status and True else: logger.warn( "lane {} did not pass Q30 qc check. This FC will not be transferred." .format(lane)) lane_status = lane_status and False #QC for undetermined max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_pooled_lane max_frequency_most_represented_und = max_frequency_most_represented_und_index_pooled_lane #distinguish the case between Pooled and Unpooled lanes, for unpooled lanes rename the Undetemriend file if self.is_unpooled_lane(lane): ##DO NOT ADD UNDET BY DEFAULT TO SAMPLES #rename undetermiend, in this way PIPER will be able to use them self._rename_undet(lane, samples_per_lane) ##logger.info("linking undetermined lane {} to sample".format(lane)) ##but do not soft link them #misc.link_undet_to_sample(run_dir, dmux_folder, lane, path_per_lane) max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_unpooled_lane max_frequency_most_represented_und = max_frequency_most_represented_und_index_unpooled_lane if self.check_undetermined_reads( lane, max_percentage_undetermined_indexes): if self.check_maximum_undertemined_freq( lane, max_frequency_most_represented_und): lane_status = lane_status and True else: logger.warn( "lane {} did not pass the check for most represented undet index. Most occurint undet index occurs too ofetn." .format(lane)) lane_status = lane_status and False else: logger.warn( "lane {} did not pass the undetermiend qc checks. Fraction of undetermined too large." .format(lane)) lane_status = lane_status and False if lane_status: logger.info("lane {} passed all qc checks".format(lane)) #store the status for the all FC status = status and lane_status return status
def check_QC(self): #TODO rewrite this using Illumina computed Undetermined files run_dir = self.run_dir dmux_folder = self.demux_dir max_percentage_undetermined_indexes_pooled_lane = self.CONFIG['QC']['max_percentage_undetermined_indexes_pooled_lane'] max_percentage_undetermined_indexes_unpooled_lane = self.CONFIG['QC']['max_percentage_undetermined_indexes_unpooled_lane'] minimum_percentage_Q30_bases_per_lane = self.CONFIG['QC']['minimum_percentage_Q30_bases_per_lane'] minimum_yield_per_lane = self.CONFIG['QC']['minimum_yield_per_lane'] max_frequency_most_represented_und_index_pooled_lane = self.CONFIG['QC']['max_frequency_most_represented_und_index_pooled_lane'] max_frequency_most_represented_und_index_unpooled_lane = self.CONFIG['QC']['max_frequency_most_represented_und_index_unpooled_lane'] if not self.runParserObj.samplesheet or not self.runParserObj.lanebarcodes or not self.runParserObj.lanes: logger.error("Something went wrong while parsing demultiplex results. QC cannot be performed.") return False status = True #initialise status as passed #read the samplesheet and fetch all lanes lanes_to_qc = misc.return_unique([lanes['Lane'] for lanes in self.runParserObj.samplesheet.data]) path_per_lane = self.get_path_per_lane() samples_per_lane = self.get_samples_per_lane() #now for each lane for lane in lanes_to_qc: lane_status = True #QC lane yield if self.lane_check_yield(lane, minimum_yield_per_lane): lane_status = lane_status and True else: logger.warn("lane {} did not pass yield qc check. This FC will not be transferred.".format(lane)) lane_status = lane_status and False #QC on the total %>Q30 of the all lane if self.lane_check_Q30(lane, minimum_percentage_Q30_bases_per_lane): lane_status = lane_status and True else: logger.warn("lane {} did not pass Q30 qc check. This FC will not be transferred.".format(lane)) lane_status = lane_status and False #QC for undetermined max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_pooled_lane max_frequency_most_represented_und = max_frequency_most_represented_und_index_pooled_lane #distinguish the case between Pooled and Unpooled lanes, for unpooled lanes rename the Undetemriend file if self.is_unpooled_lane(lane): #rename undetermiend, in this way PIPER will be able to use them self._rename_undet(lane, samples_per_lane) max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_unpooled_lane max_frequency_most_represented_und = max_frequency_most_represented_und_index_unpooled_lane logger.info("linking undetermined lane {} to sample".format(lane)) misc.link_undet_to_sample(run_dir, dmux_folder, lane, path_per_lane) if self.check_undetermined_reads(lane, max_percentage_undetermined_indexes): if self.check_maximum_undertemined_freq(lane, max_frequency_most_represented_und): lane_status= lane_status and True else: logger.warn("lane {} did not pass the check for most represented undet index. Most occurint undet index occurs too ofetn.".format(lane)) lane_status= lane_status and False else: logger.warn("lane {} did not pass the undetermiend qc checks. Fraction of undetermined too large.".format(lane)) lane_status= lane_status and False if lane_status: logger.info("lane {} passed all qc checks".format(lane)) #store the status for the all FC status = status and lane_status return status
def test_return_unique(self): """Return unique items in a list.""" input_list = ['a', 'b', 'a', 'c'] returned_list = misc.return_unique(input_list) expected_list = ['a', 'b', 'c'] self.assertEqual(returned_list, expected_list)