コード例 #1
0
ファイル: Runs.py プロジェクト: Hammarn/TACA
 def check_run_status(self):
     """
     This function checks the status of a run while in progress.
     In the case of HiSeq check that all demux have been done and in that case perform aggregation
     """
     run_dir = self.run_dir
     dex_status = self.get_run_status()
     #in this case I have already finished all demux jobs and I have aggregate all stasts unded Demultiplexing
     if dex_status == 'COMPLETED':
         return None
     #otherwise check the status of running demux
     #collect all samplesheets generated before
     samplesheets = glob.glob(
         os.path.join(run_dir, "*_[0-9].csv")
     )  # a single digit... this hipotesis should hold for a while
     allDemuxDone = True
     for samplesheet in samplesheets:
         #fetch the id of this demux job
         demux_id = os.path.splitext(
             os.path.split(samplesheet)[1])[0].split("_")[1]
         #demux folder is
         demux_folder = os.path.join(run_dir,
                                     "Demultiplexing_{}".format(demux_id))
         #check if this job is done
         if os.path.exists(
                 os.path.join(run_dir, demux_folder, 'Stats',
                              'DemultiplexingStats.xml')):
             allDemuxDone = allDemuxDone and True
             logger.info(
                 "Sub-Demultiplexing in {} completed.".format(demux_folder))
         else:
             allDemuxDone = allDemuxDone and False
             logger.info(
                 "Sub-Demultiplexing in {} not completed yet.".format(
                     demux_folder))
     #in this case, I need to aggreate in the Demultiplexing folder all the results
     if allDemuxDone:
         self._aggregate_demux_results()
         #now I can initialise the RunParser
         self.runParserObj = RunParser(self.run_dir)
         #and now I can rename undetermined if needed
         lanes = misc.return_unique([
             lanes['Lane'] for lanes in self.runParserObj.samplesheet.data
         ])
         samples_per_lane = self.get_samples_per_lane()
         for lane in lanes:
             if self.is_unpooled_lane(lane):
                 self._rename_undet(lane, samples_per_lane)
コード例 #2
0
    def check_QC(self):
        run_dir = self.run_dir
        dmux_folder = self.demux_dir

        max_percentage_undetermined_indexes_pooled_lane = self.CONFIG['QC'][
            'max_percentage_undetermined_indexes_pooled_lane']
        max_percentage_undetermined_indexes_unpooled_lane = self.CONFIG['QC'][
            'max_percentage_undetermined_indexes_unpooled_lane']
        minimum_percentage_Q30_bases_per_lane = self.CONFIG['QC'][
            'minimum_percentage_Q30_bases_per_lane']
        minimum_yield_per_lane = self.CONFIG['QC']['minimum_yield_per_lane']
        max_frequency_most_represented_und_index_pooled_lane = self.CONFIG[
            'QC']['max_frequency_most_represented_und_index_pooled_lane']
        max_frequency_most_represented_und_index_unpooled_lane = self.CONFIG[
            'QC']['max_frequency_most_represented_und_index_unpooled_lane']

        if not self.runParserObj.samplesheet or not self.runParserObj.lanebarcodes or not self.runParserObj.lanes:
            logger.error(
                "Something went wrong while parsing demultiplex results. QC cannot be performed."
            )
            return False

        status = True  #initialise status as passed
        #read the samplesheet and fetch all lanes
        lanes_to_qc = misc.return_unique(
            [lanes['Lane'] for lanes in self.runParserObj.samplesheet.data])
        path_per_lane = self.get_path_per_lane()
        samples_per_lane = self.get_samples_per_lane()
        #now for each lane
        for lane in lanes_to_qc:
            lane_status = True
            #QC lane yield
            if self.lane_check_yield(lane, minimum_yield_per_lane):
                lane_status = lane_status and True
            else:
                logger.warn(
                    "lane {} did not pass yield qc check. This FC will not be transferred."
                    .format(lane))
                lane_status = lane_status and False
            #QC on the total %>Q30 of the all lane
            if self.lane_check_Q30(lane,
                                   minimum_percentage_Q30_bases_per_lane):
                lane_status = lane_status and True
            else:
                logger.warn(
                    "lane {} did not pass Q30 qc check. This FC will not be transferred."
                    .format(lane))
                lane_status = lane_status and False
            #QC for undetermined
            max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_pooled_lane
            max_frequency_most_represented_und = max_frequency_most_represented_und_index_pooled_lane
            #distinguish the case between Pooled and Unpooled lanes, for unpooled lanes rename the Undetemriend file
            if self.is_unpooled_lane(lane):
                ##DO NOT ADD UNDET BY DEFAULT TO SAMPLES
                #rename undetermiend, in this way PIPER will be able to use them
                self._rename_undet(lane, samples_per_lane)
                ##logger.info("linking undetermined lane {} to sample".format(lane))
                ##but do not soft link them
                #misc.link_undet_to_sample(run_dir, dmux_folder, lane, path_per_lane)
                max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_unpooled_lane
                max_frequency_most_represented_und = max_frequency_most_represented_und_index_unpooled_lane

            if self.check_undetermined_reads(
                    lane, max_percentage_undetermined_indexes):
                if self.check_maximum_undertemined_freq(
                        lane, max_frequency_most_represented_und):
                    lane_status = lane_status and True
                else:
                    logger.warn(
                        "lane {} did not pass the check for most represented undet index. Most occurint undet index occurs too ofetn."
                        .format(lane))
                    lane_status = lane_status and False
            else:
                logger.warn(
                    "lane {} did not pass the undetermiend qc checks. Fraction of undetermined too large."
                    .format(lane))
                lane_status = lane_status and False
            if lane_status:
                logger.info("lane {} passed all qc checks".format(lane))
            #store the status for the all FC
            status = status and lane_status

        return status
コード例 #3
0
ファイル: HiSeqX_Runs.py プロジェクト: eriksjolund/TACA
    def check_QC(self):
        #TODO rewrite this using Illumina computed Undetermined files
        run_dir = self.run_dir
        dmux_folder = self.demux_dir

        max_percentage_undetermined_indexes_pooled_lane   = self.CONFIG['QC']['max_percentage_undetermined_indexes_pooled_lane']
        max_percentage_undetermined_indexes_unpooled_lane = self.CONFIG['QC']['max_percentage_undetermined_indexes_unpooled_lane']
        minimum_percentage_Q30_bases_per_lane             = self.CONFIG['QC']['minimum_percentage_Q30_bases_per_lane']
        minimum_yield_per_lane                            = self.CONFIG['QC']['minimum_yield_per_lane']
        max_frequency_most_represented_und_index_pooled_lane   = self.CONFIG['QC']['max_frequency_most_represented_und_index_pooled_lane']
        max_frequency_most_represented_und_index_unpooled_lane = self.CONFIG['QC']['max_frequency_most_represented_und_index_unpooled_lane']

        if not self.runParserObj.samplesheet or not self.runParserObj.lanebarcodes or not self.runParserObj.lanes:
            logger.error("Something went wrong while parsing demultiplex results. QC cannot be performed.")
            return False

        status = True #initialise status as passed
        #read the samplesheet and fetch all lanes
        lanes_to_qc       = misc.return_unique([lanes['Lane'] for lanes in  self.runParserObj.samplesheet.data])
        path_per_lane    =  self.get_path_per_lane()
        samples_per_lane =  self.get_samples_per_lane()
        #now for each lane
        for lane in lanes_to_qc:
            lane_status = True
            #QC lane yield
            if self.lane_check_yield(lane, minimum_yield_per_lane):
                lane_status = lane_status and True
            else:
                logger.warn("lane {} did not pass yield qc check. This FC will not be transferred.".format(lane))
                lane_status = lane_status and False
            #QC on the total %>Q30 of the all lane
            if self.lane_check_Q30(lane, minimum_percentage_Q30_bases_per_lane):
                lane_status = lane_status and True
            else:
                logger.warn("lane {} did not pass Q30 qc check. This FC will not be transferred.".format(lane))
                lane_status = lane_status and False
            #QC for undetermined
            max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_pooled_lane
            max_frequency_most_represented_und  = max_frequency_most_represented_und_index_pooled_lane
            #distinguish the case between Pooled and Unpooled lanes, for unpooled lanes rename the Undetemriend file
            if self.is_unpooled_lane(lane):
                #rename undetermiend, in this way PIPER will be able to use them
                self._rename_undet(lane, samples_per_lane)
                max_percentage_undetermined_indexes = max_percentage_undetermined_indexes_unpooled_lane
                max_frequency_most_represented_und  = max_frequency_most_represented_und_index_unpooled_lane
                logger.info("linking undetermined lane {} to sample".format(lane))
                misc.link_undet_to_sample(run_dir, dmux_folder, lane, path_per_lane)
            
            if self.check_undetermined_reads(lane, max_percentage_undetermined_indexes):
                if self.check_maximum_undertemined_freq(lane, max_frequency_most_represented_und):
                    lane_status= lane_status and True
                else:
                    logger.warn("lane {} did not pass the check for most represented undet index. Most occurint undet index occurs too ofetn.".format(lane))
                    lane_status= lane_status and False
            else:
                logger.warn("lane {} did not pass the undetermiend qc checks. Fraction of undetermined too large.".format(lane))
                lane_status= lane_status and False
            if lane_status:
                logger.info("lane {} passed all qc checks".format(lane))
            #store the status for the all FC
            status = status and lane_status

        return status
コード例 #4
0
 def test_return_unique(self):
     """Return unique items in a list."""
     input_list = ['a', 'b', 'a', 'c']
     returned_list = misc.return_unique(input_list)
     expected_list = ['a', 'b', 'c']
     self.assertEqual(returned_list, expected_list)