Example #1
0
def process_first_read(*args, **kwargs):
    """Processing to be performed after the first read and the index reads
    have been sequenced
    """
    dname, config = args[0:2]
    # Do bcl -> fastq conversion and demultiplexing using Casava1.8+
    if kwargs.get("casava", False):
        if not kwargs.get("no_casava_processing", False):
            logger2.info("Generating fastq.gz files for read 1 of {:s}".format(dname))

            # Touch the indicator flag that processing of read1 has been started
            utils.touch_indicator_file(os.path.join(dname, "first_read_processing_started.txt"))
            unaligned_dirs = _generate_fastq_with_casava(dname, config, r1=True)
            logger2.info("Done generating fastq.gz files for read 1 of {:s}".format(dname))

            # Extract the top barcodes from the undemultiplexed fraction
            for unaligned_dir in unaligned_dirs:
                if config["program"].get("extract_barcodes", None):
                    extract_top_undetermined_indexes(dname, unaligned_dir, config)

        for unaligned_dir in unaligned_dirs:
            unaligned_dir = os.path.join(dname, "Unaligned")
            loc_args = args + (unaligned_dir,)
            _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg", False),
                                            "process_msg": False,
                                            "store_msg": kwargs.get("store_msg", False),
                                            "backup_msg": kwargs.get("backup_msg", False),
                                            "push_data": kwargs.get("push_data", False)})

        # Touch the indicator flag that processing of read1 has been completed
        utils.touch_indicator_file(os.path.join(dname, "first_read_processing_completed.txt"))
Example #2
0
 def test__do_second_read_processing(self):
     """Second read processing logic
     """
     runinfo = os.path.join(self.rootdir,"RunInfo.xml")
     self._runinfo(runinfo)
     utils.touch_file(os.path.join(self.rootdir,
                                   "Basecalling_Netcopy_complete_READ2.txt"))
     self.assertTrue(_do_second_read_processing(self.rootdir),
                     "Processing should be run when last read GAII checkpoint exists")
     os.unlink(os.path.join(self.rootdir,
                            "Basecalling_Netcopy_complete_READ2.txt"))
     self.assertFalse(_do_second_read_processing(self.rootdir),
                      "Processing should not be run before any reads are finished")
     utils.touch_file(os.path.join(self.rootdir,
                                   "Basecalling_Netcopy_complete_Read2.txt"))
     self.assertFalse(_do_second_read_processing(self.rootdir),
                      "Processing should not be run before last read is finished")
     utils.touch_file(os.path.join(self.rootdir,
                                   "Basecalling_Netcopy_complete_Read3.txt"))
     self.assertTrue(_do_second_read_processing(self.rootdir),
                     "Processing should be run when last read is finished")
     utils.touch_indicator_file(os.path.join(self.rootdir,
                                             "second_read_processing_started.txt"))
     self.assertFalse(_do_second_read_processing(self.rootdir),
                      "Processing should not be run when processing has started")
Example #3
0
 def test__is_started_second_read_processing(self):
     """Second read processing started
     """
     self.assertFalse(ifm._is_started_second_read_processing(self.rootdir))
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "second_read_processing_started.txt"))
     self.assertTrue(ifm._is_started_second_read_processing(self.rootdir))
Example #4
0
def process_first_read(*args, **kwargs):
    """Processing to be performed after the first read and the index reads
    have been sequenced
    """
    
    dname, config = args[0:2]
    # Do bcl -> fastq conversion and demultiplexing using Casava1.8+
    if kwargs.get("casava",False):
        logger2.info("Generating fastq.gz files for read 1 of {:s}".format(dname))
        
        # Touch the indicator flag that processing of read1 has been started
        utils.touch_indicator_file(os.path.join(dname,"first_read_processing_started.txt"))
        unaligned_dir = _generate_fastq_with_casava(dname, config, r1=True)
        logger2.info("Done generating fastq.gz files for read 1 of {:s}".format(dname))
        
        # Extract the top barcodes from the undemultiplexed fraction
        if config["program"].get("extract_barcodes",None):
            extract_top_undetermined_indexes(dname,
                                             unaligned_dir,
                                             config)
            
        loc_args = args + (unaligned_dir,)
        _post_process_run(*loc_args, **{"fetch_msg": True,
                                        "process_msg": False,
                                        "store_msg": kwargs.get("store_msg",False),
                                        "backup_msg": False})
        
        # Touch the indicator flag that processing of read1 has been completed
        utils.touch_indicator_file(os.path.join(dname,"first_read_processing_completed.txt"))
Example #5
0
 def test__do_second_read_processing(self):
     """Second read processing logic
     """
     runinfo = os.path.join(self.rootdir, "RunInfo.xml")
     self._runinfo(runinfo)
     utils.touch_file(
         os.path.join(self.rootdir,
                      "Basecalling_Netcopy_complete_READ2.txt"))
     self.assertTrue(
         ifm._do_second_read_processing(self.rootdir),
         "Processing should be run when last read GAII checkpoint exists")
     os.unlink(
         os.path.join(self.rootdir,
                      "Basecalling_Netcopy_complete_READ2.txt"))
     self.assertFalse(
         ifm._do_second_read_processing(self.rootdir),
         "Processing should not be run before any reads are finished")
     utils.touch_file(
         os.path.join(self.rootdir,
                      "Basecalling_Netcopy_complete_Read2.txt"))
     self.assertFalse(
         ifm._do_second_read_processing(self.rootdir),
         "Processing should not be run before last read is finished")
     utils.touch_file(
         os.path.join(self.rootdir,
                      "Basecalling_Netcopy_complete_Read3.txt"))
     self.assertTrue(ifm._do_second_read_processing(self.rootdir),
                     "Processing should be run when last read is finished")
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "second_read_processing_started.txt"))
     self.assertFalse(
         ifm._do_second_read_processing(self.rootdir),
         "Processing should not be run when processing has started")
Example #6
0
 def test__is_initial_processing(self):
     """Initial processing in progress"""
     self.assertFalse(_is_initial_processing(self.rootdir),
                      "No indicator files should not indicate processing in progress")
     utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_started.txt"))
     self.assertTrue(_is_initial_processing(self.rootdir),
                     "Started indicator file should indicate processing in progress")
     utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_completed.txt"))
     self.assertFalse(_is_initial_processing(self.rootdir),
                     "Completed indicator file should not indicate processing in progress")
 def test__is_processing_first_read(self):
     """First read processing in progress
     """
     self.assertFalse(ifm._is_processing_first_read(self.rootdir),
                      "No indicator files should not indicate processing in progress")
     utils.touch_indicator_file(os.path.join(self.rootdir,"first_read_processing_started.txt"))
     self.assertTrue(ifm._is_processing_first_read(self.rootdir),
                     "Started indicator file should indicate processing in progress")
     utils.touch_indicator_file(os.path.join(self.rootdir,"first_read_processing_completed.txt"))
     self.assertFalse(ifm._is_processing_first_read(self.rootdir),
                     "Completed indicator file should not indicate processing in progress")
Example #8
0
 def test__do_initial_processing(self):
     """Initial processing logic
     """
     self.assertFalse(_do_initial_processing(self.rootdir),
                      "Initial processing should not be run with missing indicator flags")
     utils.touch_file(os.path.join(self.rootdir,"First_Base_Report.htm"))
     self.assertTrue(_do_initial_processing(self.rootdir),
                      "Initial processing should be run after first base report creation")
     utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_started.txt"))
     self.assertFalse(_do_initial_processing(self.rootdir),
                      "Initial processing should not be run when processing has been started")
     os.unlink(os.path.join(self.rootdir,"First_Base_Report.htm"))
     self.assertFalse(_do_initial_processing(self.rootdir),
                      "Initial processing should not be run when processing has been started " \
                      "and missing first base report")
Example #9
0
 def test__is_initial_processing(self):
     """Initial processing in progress"""
     self.assertFalse(
         ifm._is_initial_processing(self.rootdir),
         "No indicator files should not indicate processing in progress")
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "initial_processing_started.txt"))
     self.assertTrue(
         ifm._is_initial_processing(self.rootdir),
         "Started indicator file should indicate processing in progress")
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "initial_processing_completed.txt"))
     self.assertFalse(
         ifm._is_initial_processing(self.rootdir),
         "Completed indicator file should not indicate processing in progress"
     )
Example #10
0
def initial_processing(*args, **kwargs):
    """Initial processing to be performed after the first base report
    """
    
    dname = args[0]
    # Touch the indicator flag that processing of read1 has been started
    utils.touch_indicator_file(os.path.join(dname,"initial_processing_started.txt"))
    
    # Upload the necessary files
    loc_args = args + (None, )
    #import pdb; pdb.set_trace()
    _post_process_run(*loc_args, **{"fetch_msg": True,
                                    "process_msg": False,
                                    "store_msg": kwargs.get("store_msg",False),
                                    "backup_msg": False})
    
    # Touch the indicator flag that processing of read1 has been completed
    utils.touch_indicator_file(os.path.join(dname,"initial_processing_completed.txt"))
Example #11
0
def process_second_read(*args, **kwargs):
    """Processing to be performed after all reads have been sequenced
    """
    dname, config = args[0:2]
    logger2.info("The instrument has finished dumping on directory %s" % dname)

    utils.touch_indicator_file(os.path.join(dname, "second_read_processing_started.txt"))
    _update_reported(config["msg_db"], dname)
    fastq_dir = None

    # Do bcl -> fastq conversion and demultiplexing using Casava1.8+
    if kwargs.get("casava", False):
        if not kwargs.get("no_casava_processing", False):
            logger2.info("Generating fastq.gz files for {:s}".format(dname))
            _generate_fastq_with_casava(dname, config)
            # Merge demultiplexing results into a single Unaligned folder
            utils.merge_demux_results(dname)
            #Move the demultiplexing results
            if config.has_key('mfs_dir'):
                fc_id = os.path.basename(dname)
                cl = ["rsync", \
                      "--checksum", \
                      "--recursive", \
                      "--links", \
                      "-D", \
                      "--partial", \
                      "--progress", \
                      "--prune-empty-dirs", \
                      os.path.join(dname, 'Unaligned'), \
                      os.path.join(config.get('mfs_dir'), fc_id)
                      ]
                logger2.info("Synching Unaligned folder to MooseFS for run {}".format(fc_id))
                logdir = os.path.join(config.get('log_dir'), os.getcwd())
                rsync_out = os.path.join(logdir,"rsync_transfer.out")
                rsync_err = os.path.join(logdir,"rsync_transfer.err")

                with open(rsync_out, 'a') as ro:
                    with open(rsync_err, 'a') as re:
                        try:
                            ro.write("-----------\n{}\n".format(" ".join(cl)))
                            re.write("-----------\n{}\n".format(" ".join(cl)))
                            subprocess.check_call(cl, stdout=ro, stderr=re)
                        except subprocess.CalledProcessError, e:
                            logger2.error("rsync transfer of Unaligned results FAILED")
Example #12
0
def initial_processing(*args, **kwargs):
    """Initial processing to be performed after the first base report
    """
    dname, config = args[0:2]
    # Touch the indicator flag that processing of read1 has been started
    utils.touch_indicator_file(os.path.join(dname, "initial_processing_started.txt"))

    # Copy the samplesheet to the run folder
    ss_file = samplesheet.run_has_samplesheet(dname, config)
    if ss_file:
        dst = os.path.join(dname,os.path.basename(ss_file))
        try:
            copyfile(ss_file,dst)
        except IOError, e:
            logger2.error("Error copying samplesheet {} from {} to {}: {}" \
                          "".format(os.path.basename(ss_file),
                                    os.path.dirname(ss_file),
                                    os.path.dirname(dst),
                                    e))
Example #13
0
 def test__do_first_read_processing(self):
     """First read processing logic
     """
     runinfo = os.path.join(self.rootdir, "RunInfo.xml")
     self._runinfo(runinfo)
     self.assertFalse(
         ifm._do_first_read_processing(self.rootdir),
         "Processing should not be run before first read is finished")
     utils.touch_file(
         os.path.join(self.rootdir,
                      "Basecalling_Netcopy_complete_Read1.txt"))
     self.assertFalse(
         ifm._do_first_read_processing(self.rootdir),
         "Processing should not be run before last index read is finished")
     utils.touch_file(
         os.path.join(self.rootdir,
                      "Basecalling_Netcopy_complete_Read2.txt"))
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "initial_processing_started.txt"))
     self.assertFalse(
         ifm._do_first_read_processing(self.rootdir),
         "Processing should not be run when previous processing step is in progress"
     )
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "initial_processing_completed.txt"))
     self.assertTrue(
         ifm._do_first_read_processing(self.rootdir),
         "Processing should be run when last index read is finished")
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "first_read_processing_started.txt"))
     self.assertFalse(
         ifm._do_first_read_processing(self.rootdir),
         "Processing should not be run when processing has started")
Example #14
0
 def test__do_first_read_processing(self):
     """First read processing logic
     """
     runinfo = os.path.join(self.rootdir,"RunInfo.xml")
     self._runinfo(runinfo)
     self.assertFalse(_do_first_read_processing(self.rootdir),
                      "Processing should not be run before first read is finished")
     utils.touch_file(os.path.join(self.rootdir,
                                   "Basecalling_Netcopy_complete_Read1.txt"))
     self.assertFalse(_do_first_read_processing(self.rootdir),
                      "Processing should not be run before last index read is finished")
     utils.touch_file(os.path.join(self.rootdir,
                                   "Basecalling_Netcopy_complete_Read2.txt"))
     utils.touch_indicator_file(os.path.join(self.rootdir,
                                             "initial_processing_started.txt"))
     self.assertFalse(_do_first_read_processing(self.rootdir),
                      "Processing should not be run when previous processing step is in progress")
     utils.touch_indicator_file(os.path.join(self.rootdir,
                                             "initial_processing_completed.txt"))
     self.assertTrue(_do_first_read_processing(self.rootdir),
                     "Processing should be run when last index read is finished")
     utils.touch_indicator_file(os.path.join(self.rootdir,
                                             "first_read_processing_started.txt"))
     self.assertFalse(_do_first_read_processing(self.rootdir),
                      "Processing should not be run when processing has started")
Example #15
0
 def test__do_initial_processing(self):
     """Initial processing logic
     """
     self.assertFalse(
         ifm._do_initial_processing(self.rootdir),
         "Initial processing should not be run with missing indicator flags"
     )
     utils.touch_file(os.path.join(self.rootdir, "First_Base_Report.htm"))
     self.assertTrue(
         ifm._do_initial_processing(self.rootdir),
         "Initial processing should be run after first base report creation"
     )
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "initial_processing_started.txt"))
     self.assertFalse(
         ifm._do_initial_processing(self.rootdir),
         "Initial processing should not be run when processing has been started"
     )
     os.unlink(os.path.join(self.rootdir, "First_Base_Report.htm"))
     self.assertFalse(ifm._do_initial_processing(self.rootdir),
                      "Initial processing should not be run when processing has been started " \
                      "and missing first base report")
Example #16
0
def process_second_read(*args, **kwargs):
    """Processing to be performed after all reads have been sequences
    """
    dname, config = args[0:2]
    logger2.info("The instrument has finished dumping on directory %s" % dname)
    
    utils.touch_indicator_file(os.path.join(dname,"second_read_processing_started.txt"))
    _update_reported(config["msg_db"], dname)
    fastq_dir = None
    
    # Do bcl -> fastq conversion and demultiplexing using Casava1.8+
    if kwargs.get("casava",False):
        logger2.info("Generating fastq.gz files for {:s}".format(dname))
        _generate_fastq_with_casava(dname, config)
    else:
        _process_samplesheets(dname, config)
        if kwargs.get("qseq",True):
            logger2.info("Generating qseq files for {:s}".format(dname))
            _generate_qseq(get_qseq_dir(dname), config)
            
        if kwargs.get("fastq",True):
            logger2.info("Generating fastq files for {:s}".format(dname))
            fastq_dir = _generate_fastq(dname, config)
            if kwargs.get("remove_qseq",False):
                _clean_qseq(get_qseq_dir(dname), fastq_dir)
            _calculate_md5(fastq_dir)
            
    # Call the post_processing method
    loc_args = args + (fastq_dir,)
    _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg",True),
                                    "process_msg": kwargs.get("process_msg",True),
                                    "store_msg": kwargs.get("store_msg",True),
                                    "backup_msg": kwargs.get("backup_msg",False)})

    # Update the reported database after successful processing
    _update_reported(config["msg_db"], dname)
    utils.touch_indicator_file(os.path.join(dname,"second_read_processing_completed.txt"))
Example #17
0
 def test__is_started_second_read_processing(self):
     """Second read processing started
     """
     self.assertFalse(_is_started_second_read_processing(self.rootdir))
     utils.touch_indicator_file(os.path.join(self.rootdir,"second_read_processing_started.txt"))
     self.assertTrue(_is_started_second_read_processing(self.rootdir))
Example #18
0
 def test__is_started_initial_processing(self):
     """Initial processing started"""
     self.assertFalse(_is_started_initial_processing(self.rootdir))
     utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_started.txt"))
     self.assertTrue(_is_started_initial_processing(self.rootdir))
Example #19
0
            hiseq_ssheet = os.path.join(dname,'{}.csv'.format(_get_flowcell_id(dname)))
            mrun.write_hiseq_samplesheet(hiseq_ssheet)
        # If the module wasn't loaded, there's nothing we can do, so warn
        else:
            logger2.error("The necessary dependencies for processing MiSeq runs with CASAVA could not be loaded")

    # Upload the necessary files
    loc_args = args + (None, )
    _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg", False),
                                    "process_msg": False,
                                    "store_msg": kwargs.get("store_msg", False),
                                    "backup_msg": kwargs.get("backup_msg", False),
                                    "push_data": kwargs.get("push_data", False)})

    # Touch the indicator flag that processing of read1 has been completed
    utils.touch_indicator_file(os.path.join(dname, "initial_processing_completed.txt"))


def process_first_read(*args, **kwargs):
    """Processing to be performed after the first read and the index reads
    have been sequenced
    """
    dname, config = args[0:2]
    # Do bcl -> fastq conversion and demultiplexing using Casava1.8+
    if kwargs.get("casava", False):
        if not kwargs.get("no_casava_processing", False):
            logger2.info("Generating fastq.gz files for read 1 of {:s}".format(dname))

            # Touch the indicator flag that processing of read1 has been started
            utils.touch_indicator_file(os.path.join(dname, "first_read_processing_started.txt"))
            unaligned_dirs = _generate_fastq_with_casava(dname, config, r1=True)
 def test__is_started_first_read_processing(self):
     """First read processing started
     """
     self.assertFalse(ifm._is_started_first_read_processing(self.rootdir))
     utils.touch_indicator_file(os.path.join(self.rootdir,"first_read_processing_started.txt"))
     self.assertTrue(ifm._is_started_first_read_processing(self.rootdir))
Example #21
0
 def test__is_started_initial_processing(self):
     """Initial processing started"""
     self.assertFalse(ifm._is_started_initial_processing(self.rootdir))
     utils.touch_indicator_file(
         os.path.join(self.rootdir, "initial_processing_started.txt"))
     self.assertTrue(ifm._is_started_initial_processing(self.rootdir))