Exemple #1
0
def get_runObj(run):
    """ Tries to read runParameters.xml to parse the type of sequencer
        and then return the respective Run object (MiSeq, HiSeq..)
        :param run: run name identifier
        :type run: string
        :rtype: Object
        :returns: returns the sequencer type object,
        None if the sequencer type is unknown of there was an error
    """

    if os.path.exists(os.path.join(run, 'runParameters.xml')):
        run_parameters_file = "runParameters.xml"
    elif os.path.exists(os.path.join(run, 'RunParameters.xml')):
        run_parameters_file = "RunParameters.xml"
    else:
        logger.error("Cannot find RunParameters.xml or runParameters.xml in the run folder for run {}".format(run))
        return

    rppath = os.path.join(run, run_parameters_file)
    try:
        rp = RunParametersParser(os.path.join(run, run_parameters_file))
    except OSError:
        logger.warn("Problems parsing the runParameters.xml file at {}. "
                    "This is quite unexpected. please archive the run {} manually".format(rppath, run))
    else:
        #do a case by case test becasue there are so many version of RunParameters that there is no real other way
        runtype = rp.data['RunParameters'].get("Application", "")
        if "Setup" in rp.data['RunParameters']:
            #this is the HiSeq2500, MiSeq, and HiSeqX case
            try:
                # Works for recent control software
                runtype = rp.data['RunParameters']["Setup"]["Flowcell"]
            except KeyError:
                # Use this as second resource but print a warning in the logs
                logger.warn("Parsing runParameters to fecth instrument type, "
                            "not found Flowcell information in it. Using ApplicationName")
                # here makes sense to use get with default value "" ->
                # so that it doesn't raise an exception in the next lines
                # (in case ApplicationName is not found, get returns None)
                runtype = rp.data['RunParameters']["Setup"].get("ApplicationName", "")


        if "HiSeq X" in runtype in runtype:
            return HiSeqX_Run(run, CONFIG["analysis"]["HiSeqX"])
        elif "HiSeq" in runtype or "TruSeq" in runtype:
            return HiSeq_Run(run, CONFIG["analysis"]["HiSeq"])
        elif "MiSeq" in runtype:
            return MiSeq_Run(run, CONFIG["analysis"]["MiSeq"])
        elif "NextSeq" in runtype:
            return NextSeq_Run(run, CONFIG["analysis"]["NextSeq"])
        elif "NovaSeq" in runtype:
            return NovaSeq_Run(run, CONFIG["analysis"]["NovaSeq"])
        else:
            logger.warn("Unrecognized run type {}, cannot archive the run {}. "
                        "Someone as likely bought a new sequencer without telling "
                        "it to the bioinfo team".format(runtype, run))
    # Not necessary as the function will return None at this point but
    # just for being explicit
    return None
    def setUpClass(self):
        """ Creates the following directory tree for testing purposes:

        tmp/
        |__ 141124_ST-COMPLETED_01_AFCIDXX
        |   |__ RunInfo.xml
        |   |__ Demultiplexing
        |   |   |__ Stats
        |   |       |__ DemultiplexingStats.xml
        |   |__ RTAComplete.txt
        |__ 141124_ST-INPROGRESS_02_AFCIDXX
        |   |__ RunInfo.xml
        |   |__ Demultiplexing
        |   |__ RTAComplete.txt
        |__ 141124_ST-RUNNING_03_AFCIDXX
        |   |__ RunInfo.xml
        |__ 141124_ST-TOSTART_04_FCIDXXX
            |__ RunInfo.xml
            |__ RTAComplete.txt
        """
        self.tmp_dir = os.path.join(tempfile.mkdtemp(), 'tmp')
        self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv')

        running = os.path.join(self.tmp_dir, '141124_ST-RUNNING1_03_AFCIDXX')
        to_start = os.path.join(self.tmp_dir, '141124_ST-TOSTART1_04_FCIDXXX')
        in_progress = os.path.join(self.tmp_dir, '141124_ST-INPROGRESS1_02_AFCIDXX')
        completed = os.path.join(self.tmp_dir, '141124_ST-COMPLETED1_01_AFCIDXX')
        finished_runs = [to_start, in_progress, completed]

        # Create runs directory structure
        os.makedirs(self.tmp_dir)
        os.makedirs(running)
        os.makedirs(to_start)
        os.makedirs(os.path.join(in_progress, 'Demultiplexing'))
        os.makedirs(os.path.join(completed, 'Demultiplexing', 'Stats'))

        # Create files indicating that the run is finished
        for run in finished_runs:
            open(os.path.join(run, 'RTAComplete.txt'), 'w').close()

        # Create files indicating that the preprocessing is done
        open(os.path.join(completed, 'Demultiplexing', 'Stats', 'DemultiplexingStats.xml'), 'w').close()

        # Create transfer file and add the completed run
        with open(self.transfer_file, 'w') as f:
            tsv_writer = csv.writer(f, delimiter='\t')
            tsv_writer.writerow([os.path.basename(completed), str(datetime.now())])

        # Move sample RunInfo.xml file to every run directory
        for run in [running, to_start, in_progress, completed]:
            shutil.copy('data/RunInfo.xml', run)
            shutil.copy('data/runParameters.xml', run)
        
        # Create run objects
        # Jose : add tests for other sequencers
        self.running = HiSeqX_Run(os.path.join(self.tmp_dir, 
                                               '141124_ST-RUNNING1_03_AFCIDXX'), 
                                  CONFIG["analysis"]["HiSeqX"])
        self.to_start = Run(os.path.join(self.tmp_dir, 
                                         '141124_ST-TOSTART1_04_FCIDXXX'), 
                            CONFIG["analysis"]["HiSeqX"])
        self.in_progress = Run(os.path.join(self.tmp_dir, 
                                            '141124_ST-INPROGRESS1_02_AFCIDXX'), 
                               CONFIG["analysis"]["HiSeqX"])
        self.completed = Run(os.path.join(self.tmp_dir, 
                                          '141124_ST-COMPLETED1_01_AFCIDXX'), 
                             CONFIG["analysis"]["HiSeqX"])
        self.finished_runs = [self.to_start, self.in_progress, self.completed]
        self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv')
class TestTracker(unittest.TestCase):
    """ analysis.py script tests
    """
    @classmethod
    def setUpClass(self):
        """ Creates the following directory tree for testing purposes:

        tmp/
        |__ 141124_ST-COMPLETED_01_AFCIDXX
        |   |__ RunInfo.xml
        |   |__ Demultiplexing
        |   |   |__ Stats
        |   |       |__ DemultiplexingStats.xml
        |   |__ RTAComplete.txt
        |__ 141124_ST-INPROGRESS_02_AFCIDXX
        |   |__ RunInfo.xml
        |   |__ Demultiplexing
        |   |__ RTAComplete.txt
        |__ 141124_ST-RUNNING_03_AFCIDXX
        |   |__ RunInfo.xml
        |__ 141124_ST-TOSTART_04_FCIDXXX
            |__ RunInfo.xml
            |__ RTAComplete.txt
        """
        self.tmp_dir = os.path.join(tempfile.mkdtemp(), 'tmp')
        self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv')

        running = os.path.join(self.tmp_dir, '141124_ST-RUNNING1_03_AFCIDXX')
        to_start = os.path.join(self.tmp_dir, '141124_ST-TOSTART1_04_FCIDXXX')
        in_progress = os.path.join(self.tmp_dir, '141124_ST-INPROGRESS1_02_AFCIDXX')
        completed = os.path.join(self.tmp_dir, '141124_ST-COMPLETED1_01_AFCIDXX')
        finished_runs = [to_start, in_progress, completed]

        # Create runs directory structure
        os.makedirs(self.tmp_dir)
        os.makedirs(running)
        os.makedirs(to_start)
        os.makedirs(os.path.join(in_progress, 'Demultiplexing'))
        os.makedirs(os.path.join(completed, 'Demultiplexing', 'Stats'))

        # Create files indicating that the run is finished
        for run in finished_runs:
            open(os.path.join(run, 'RTAComplete.txt'), 'w').close()

        # Create files indicating that the preprocessing is done
        open(os.path.join(completed, 'Demultiplexing', 'Stats', 'DemultiplexingStats.xml'), 'w').close()

        # Create transfer file and add the completed run
        with open(self.transfer_file, 'w') as f:
            tsv_writer = csv.writer(f, delimiter='\t')
            tsv_writer.writerow([os.path.basename(completed), str(datetime.now())])

        # Move sample RunInfo.xml file to every run directory
        for run in [running, to_start, in_progress, completed]:
            shutil.copy('data/RunInfo.xml', run)
            shutil.copy('data/runParameters.xml', run)
        
        # Create run objects
        # Jose : add tests for other sequencers
        self.running = HiSeqX_Run(os.path.join(self.tmp_dir, 
                                               '141124_ST-RUNNING1_03_AFCIDXX'), 
                                  CONFIG["analysis"]["HiSeqX"])
        self.to_start = Run(os.path.join(self.tmp_dir, 
                                         '141124_ST-TOSTART1_04_FCIDXXX'), 
                            CONFIG["analysis"]["HiSeqX"])
        self.in_progress = Run(os.path.join(self.tmp_dir, 
                                            '141124_ST-INPROGRESS1_02_AFCIDXX'), 
                               CONFIG["analysis"]["HiSeqX"])
        self.completed = Run(os.path.join(self.tmp_dir, 
                                          '141124_ST-COMPLETED1_01_AFCIDXX'), 
                             CONFIG["analysis"]["HiSeqX"])
        self.finished_runs = [self.to_start, self.in_progress, self.completed]
        self.transfer_file = os.path.join(self.tmp_dir, 'transfer.tsv')

    @classmethod
    def tearDownClass(self):
        shutil.rmtree(self.tmp_dir)

    def test_1_is_finished(self):
        """ Is finished should be True only if "RTAComplete.txt" file is present...
        """
        self.assertFalse(self.running._is_sequencing_done())
        self.assertTrue(all(map(lambda run: run._is_sequencing_done, self.finished_runs)))

    def test_2_processing_status(self):
        """ Status of the processing depends on the generated files
        """
        self.assertEqual('SEQUENCING', self.running.get_run_status())
        self.assertEqual('TO_START', self.to_start.get_run_status())
        self.assertEqual('IN_PROGRESS', self.in_progress.get_run_status())
        self.assertEqual('COMPLETED', self.completed.get_run_status())

    def test_3_is_transferred(self):
        """ is_transferred should rely on the info in transfer.tsv
        """
        self.assertTrue(self.completed.is_transferred(self.transfer_file))
        self.assertFalse(self.running.is_transferred(self.transfer_file))
        self.assertFalse(self.to_start.is_transferred(self.transfer_file))
        self.assertFalse(self.in_progress.is_transferred( self.transfer_file))
Exemple #4
0
def get_runObj(run):
    """Tries to read runParameters.xml to parse the type of sequencer
        and then return the respective Run object (MiSeq, HiSeq..)

    :param run: run name identifier
    :type run: string
    :rtype: Object
    :returns: returns the sequencer type object,
    None if the sequencer type is unknown of there was an error
    """

    if os.path.exists(os.path.join(run, 'runParameters.xml')):
        run_parameters_file = 'runParameters.xml'
    elif os.path.exists(os.path.join(run, 'RunParameters.xml')):
        run_parameters_file = 'RunParameters.xml'
    else:
        logger.error(
            'Cannot find RunParameters.xml or runParameters.xml in the run folder for run {}'
            .format(run))
        return

    rppath = os.path.join(run, run_parameters_file)
    try:
        rp = RunParametersParser(os.path.join(run, run_parameters_file))
    except OSError:
        logger.warn(
            'Problems parsing the runParameters.xml file at {}. '
            'This is quite unexpected. please archive the run {} manually'.
            format(rppath, run))
    else:
        # Do a case by case test becasue there are so many version of RunParameters that there is no real other way
        runtype = rp.data['RunParameters'].get(
            'Application', rp.data['RunParameters'].get('ApplicationName', ''))
        if 'Setup' in rp.data['RunParameters']:
            # This is the HiSeq2500, MiSeq, and HiSeqX case
            try:
                # Works for recent control software
                runtype = rp.data['RunParameters']['Setup']['Flowcell']
            except KeyError:
                # Use this as second resource but print a warning in the logs
                logger.warn(
                    'Parsing runParameters to fecth instrument type, '
                    'not found Flowcell information in it. Using ApplicationName'
                )
                # Here makes sense to use get with default value '' ->
                # so that it doesn't raise an exception in the next lines
                # (in case ApplicationName is not found, get returns None)
                runtype = rp.data['RunParameters']['Setup'].get(
                    'ApplicationName', '')

        if 'HiSeq X' in runtype:
            return HiSeqX_Run(run, CONFIG['analysis']['HiSeqX'])
        elif 'HiSeq' in runtype or 'TruSeq' in runtype:
            return HiSeq_Run(run, CONFIG['analysis']['HiSeq'])
        elif 'MiSeq' in runtype:
            return MiSeq_Run(run, CONFIG['analysis']['MiSeq'])
        elif 'NextSeq' in runtype:
            return NextSeq_Run(run, CONFIG['analysis']['NextSeq'])
        elif 'NovaSeq' in runtype:
            return NovaSeq_Run(run, CONFIG['analysis']['NovaSeq'])
        else:
            logger.warn(
                'Unrecognized run type {}, cannot archive the run {}. '
                'Someone as likely bought a new sequencer without telling '
                'it to the bioinfo team'.format(runtype, run))
    return None