def process_first_read(*args, **kwargs): """Processing to be performed after the first read and the index reads have been sequenced """ dname, config = args[0:2] # Do bcl -> fastq conversion and demultiplexing using Casava1.8+ if kwargs.get("casava", False): if not kwargs.get("no_casava_processing", False): logger2.info("Generating fastq.gz files for read 1 of {:s}".format(dname)) # Touch the indicator flag that processing of read1 has been started utils.touch_indicator_file(os.path.join(dname, "first_read_processing_started.txt")) unaligned_dirs = _generate_fastq_with_casava(dname, config, r1=True) logger2.info("Done generating fastq.gz files for read 1 of {:s}".format(dname)) # Extract the top barcodes from the undemultiplexed fraction for unaligned_dir in unaligned_dirs: if config["program"].get("extract_barcodes", None): extract_top_undetermined_indexes(dname, unaligned_dir, config) for unaligned_dir in unaligned_dirs: unaligned_dir = os.path.join(dname, "Unaligned") loc_args = args + (unaligned_dir,) _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg", False), "process_msg": False, "store_msg": kwargs.get("store_msg", False), "backup_msg": kwargs.get("backup_msg", False), "push_data": kwargs.get("push_data", False)}) # Touch the indicator flag that processing of read1 has been completed utils.touch_indicator_file(os.path.join(dname, "first_read_processing_completed.txt"))
def test__do_second_read_processing(self): """Second read processing logic """ runinfo = os.path.join(self.rootdir,"RunInfo.xml") self._runinfo(runinfo) utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertTrue(_do_second_read_processing(self.rootdir), "Processing should be run when last read GAII checkpoint exists") os.unlink(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertFalse(_do_second_read_processing(self.rootdir), "Processing should not be run before any reads are finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) self.assertFalse(_do_second_read_processing(self.rootdir), "Processing should not be run before last read is finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read3.txt")) self.assertTrue(_do_second_read_processing(self.rootdir), "Processing should be run when last read is finished") utils.touch_indicator_file(os.path.join(self.rootdir, "second_read_processing_started.txt")) self.assertFalse(_do_second_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test__is_started_second_read_processing(self): """Second read processing started """ self.assertFalse(ifm._is_started_second_read_processing(self.rootdir)) utils.touch_indicator_file( os.path.join(self.rootdir, "second_read_processing_started.txt")) self.assertTrue(ifm._is_started_second_read_processing(self.rootdir))
def process_first_read(*args, **kwargs): """Processing to be performed after the first read and the index reads have been sequenced """ dname, config = args[0:2] # Do bcl -> fastq conversion and demultiplexing using Casava1.8+ if kwargs.get("casava",False): logger2.info("Generating fastq.gz files for read 1 of {:s}".format(dname)) # Touch the indicator flag that processing of read1 has been started utils.touch_indicator_file(os.path.join(dname,"first_read_processing_started.txt")) unaligned_dir = _generate_fastq_with_casava(dname, config, r1=True) logger2.info("Done generating fastq.gz files for read 1 of {:s}".format(dname)) # Extract the top barcodes from the undemultiplexed fraction if config["program"].get("extract_barcodes",None): extract_top_undetermined_indexes(dname, unaligned_dir, config) loc_args = args + (unaligned_dir,) _post_process_run(*loc_args, **{"fetch_msg": True, "process_msg": False, "store_msg": kwargs.get("store_msg",False), "backup_msg": False}) # Touch the indicator flag that processing of read1 has been completed utils.touch_indicator_file(os.path.join(dname,"first_read_processing_completed.txt"))
def test__do_second_read_processing(self): """Second read processing logic """ runinfo = os.path.join(self.rootdir, "RunInfo.xml") self._runinfo(runinfo) utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertTrue( ifm._do_second_read_processing(self.rootdir), "Processing should be run when last read GAII checkpoint exists") os.unlink( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_READ2.txt")) self.assertFalse( ifm._do_second_read_processing(self.rootdir), "Processing should not be run before any reads are finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) self.assertFalse( ifm._do_second_read_processing(self.rootdir), "Processing should not be run before last read is finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read3.txt")) self.assertTrue(ifm._do_second_read_processing(self.rootdir), "Processing should be run when last read is finished") utils.touch_indicator_file( os.path.join(self.rootdir, "second_read_processing_started.txt")) self.assertFalse( ifm._do_second_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test__is_initial_processing(self): """Initial processing in progress""" self.assertFalse(_is_initial_processing(self.rootdir), "No indicator files should not indicate processing in progress") utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_started.txt")) self.assertTrue(_is_initial_processing(self.rootdir), "Started indicator file should indicate processing in progress") utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_completed.txt")) self.assertFalse(_is_initial_processing(self.rootdir), "Completed indicator file should not indicate processing in progress")
def test__is_processing_first_read(self): """First read processing in progress """ self.assertFalse(ifm._is_processing_first_read(self.rootdir), "No indicator files should not indicate processing in progress") utils.touch_indicator_file(os.path.join(self.rootdir,"first_read_processing_started.txt")) self.assertTrue(ifm._is_processing_first_read(self.rootdir), "Started indicator file should indicate processing in progress") utils.touch_indicator_file(os.path.join(self.rootdir,"first_read_processing_completed.txt")) self.assertFalse(ifm._is_processing_first_read(self.rootdir), "Completed indicator file should not indicate processing in progress")
def test__do_initial_processing(self): """Initial processing logic """ self.assertFalse(_do_initial_processing(self.rootdir), "Initial processing should not be run with missing indicator flags") utils.touch_file(os.path.join(self.rootdir,"First_Base_Report.htm")) self.assertTrue(_do_initial_processing(self.rootdir), "Initial processing should be run after first base report creation") utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_started.txt")) self.assertFalse(_do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started") os.unlink(os.path.join(self.rootdir,"First_Base_Report.htm")) self.assertFalse(_do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started " \ "and missing first base report")
def test__is_initial_processing(self): """Initial processing in progress""" self.assertFalse( ifm._is_initial_processing(self.rootdir), "No indicator files should not indicate processing in progress") utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertTrue( ifm._is_initial_processing(self.rootdir), "Started indicator file should indicate processing in progress") utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_completed.txt")) self.assertFalse( ifm._is_initial_processing(self.rootdir), "Completed indicator file should not indicate processing in progress" )
def initial_processing(*args, **kwargs): """Initial processing to be performed after the first base report """ dname = args[0] # Touch the indicator flag that processing of read1 has been started utils.touch_indicator_file(os.path.join(dname,"initial_processing_started.txt")) # Upload the necessary files loc_args = args + (None, ) #import pdb; pdb.set_trace() _post_process_run(*loc_args, **{"fetch_msg": True, "process_msg": False, "store_msg": kwargs.get("store_msg",False), "backup_msg": False}) # Touch the indicator flag that processing of read1 has been completed utils.touch_indicator_file(os.path.join(dname,"initial_processing_completed.txt"))
def process_second_read(*args, **kwargs): """Processing to be performed after all reads have been sequenced """ dname, config = args[0:2] logger2.info("The instrument has finished dumping on directory %s" % dname) utils.touch_indicator_file(os.path.join(dname, "second_read_processing_started.txt")) _update_reported(config["msg_db"], dname) fastq_dir = None # Do bcl -> fastq conversion and demultiplexing using Casava1.8+ if kwargs.get("casava", False): if not kwargs.get("no_casava_processing", False): logger2.info("Generating fastq.gz files for {:s}".format(dname)) _generate_fastq_with_casava(dname, config) # Merge demultiplexing results into a single Unaligned folder utils.merge_demux_results(dname) #Move the demultiplexing results if config.has_key('mfs_dir'): fc_id = os.path.basename(dname) cl = ["rsync", \ "--checksum", \ "--recursive", \ "--links", \ "-D", \ "--partial", \ "--progress", \ "--prune-empty-dirs", \ os.path.join(dname, 'Unaligned'), \ os.path.join(config.get('mfs_dir'), fc_id) ] logger2.info("Synching Unaligned folder to MooseFS for run {}".format(fc_id)) logdir = os.path.join(config.get('log_dir'), os.getcwd()) rsync_out = os.path.join(logdir,"rsync_transfer.out") rsync_err = os.path.join(logdir,"rsync_transfer.err") with open(rsync_out, 'a') as ro: with open(rsync_err, 'a') as re: try: ro.write("-----------\n{}\n".format(" ".join(cl))) re.write("-----------\n{}\n".format(" ".join(cl))) subprocess.check_call(cl, stdout=ro, stderr=re) except subprocess.CalledProcessError, e: logger2.error("rsync transfer of Unaligned results FAILED")
def initial_processing(*args, **kwargs): """Initial processing to be performed after the first base report """ dname, config = args[0:2] # Touch the indicator flag that processing of read1 has been started utils.touch_indicator_file(os.path.join(dname, "initial_processing_started.txt")) # Copy the samplesheet to the run folder ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: dst = os.path.join(dname,os.path.basename(ss_file)) try: copyfile(ss_file,dst) except IOError, e: logger2.error("Error copying samplesheet {} from {} to {}: {}" \ "".format(os.path.basename(ss_file), os.path.dirname(ss_file), os.path.dirname(dst), e))
def test__do_first_read_processing(self): """First read processing logic """ runinfo = os.path.join(self.rootdir, "RunInfo.xml") self._runinfo(runinfo) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run before first read is finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read1.txt")) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run before last index read is finished") utils.touch_file( os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run when previous processing step is in progress" ) utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_completed.txt")) self.assertTrue( ifm._do_first_read_processing(self.rootdir), "Processing should be run when last index read is finished") utils.touch_indicator_file( os.path.join(self.rootdir, "first_read_processing_started.txt")) self.assertFalse( ifm._do_first_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test__do_first_read_processing(self): """First read processing logic """ runinfo = os.path.join(self.rootdir,"RunInfo.xml") self._runinfo(runinfo) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run before first read is finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read1.txt")) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run before last index read is finished") utils.touch_file(os.path.join(self.rootdir, "Basecalling_Netcopy_complete_Read2.txt")) utils.touch_indicator_file(os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run when previous processing step is in progress") utils.touch_indicator_file(os.path.join(self.rootdir, "initial_processing_completed.txt")) self.assertTrue(_do_first_read_processing(self.rootdir), "Processing should be run when last index read is finished") utils.touch_indicator_file(os.path.join(self.rootdir, "first_read_processing_started.txt")) self.assertFalse(_do_first_read_processing(self.rootdir), "Processing should not be run when processing has started")
def test__do_initial_processing(self): """Initial processing logic """ self.assertFalse( ifm._do_initial_processing(self.rootdir), "Initial processing should not be run with missing indicator flags" ) utils.touch_file(os.path.join(self.rootdir, "First_Base_Report.htm")) self.assertTrue( ifm._do_initial_processing(self.rootdir), "Initial processing should be run after first base report creation" ) utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertFalse( ifm._do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started" ) os.unlink(os.path.join(self.rootdir, "First_Base_Report.htm")) self.assertFalse(ifm._do_initial_processing(self.rootdir), "Initial processing should not be run when processing has been started " \ "and missing first base report")
def process_second_read(*args, **kwargs): """Processing to be performed after all reads have been sequences """ dname, config = args[0:2] logger2.info("The instrument has finished dumping on directory %s" % dname) utils.touch_indicator_file(os.path.join(dname,"second_read_processing_started.txt")) _update_reported(config["msg_db"], dname) fastq_dir = None # Do bcl -> fastq conversion and demultiplexing using Casava1.8+ if kwargs.get("casava",False): logger2.info("Generating fastq.gz files for {:s}".format(dname)) _generate_fastq_with_casava(dname, config) else: _process_samplesheets(dname, config) if kwargs.get("qseq",True): logger2.info("Generating qseq files for {:s}".format(dname)) _generate_qseq(get_qseq_dir(dname), config) if kwargs.get("fastq",True): logger2.info("Generating fastq files for {:s}".format(dname)) fastq_dir = _generate_fastq(dname, config) if kwargs.get("remove_qseq",False): _clean_qseq(get_qseq_dir(dname), fastq_dir) _calculate_md5(fastq_dir) # Call the post_processing method loc_args = args + (fastq_dir,) _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg",True), "process_msg": kwargs.get("process_msg",True), "store_msg": kwargs.get("store_msg",True), "backup_msg": kwargs.get("backup_msg",False)}) # Update the reported database after successful processing _update_reported(config["msg_db"], dname) utils.touch_indicator_file(os.path.join(dname,"second_read_processing_completed.txt"))
def test__is_started_second_read_processing(self): """Second read processing started """ self.assertFalse(_is_started_second_read_processing(self.rootdir)) utils.touch_indicator_file(os.path.join(self.rootdir,"second_read_processing_started.txt")) self.assertTrue(_is_started_second_read_processing(self.rootdir))
def test__is_started_initial_processing(self): """Initial processing started""" self.assertFalse(_is_started_initial_processing(self.rootdir)) utils.touch_indicator_file(os.path.join(self.rootdir,"initial_processing_started.txt")) self.assertTrue(_is_started_initial_processing(self.rootdir))
hiseq_ssheet = os.path.join(dname,'{}.csv'.format(_get_flowcell_id(dname))) mrun.write_hiseq_samplesheet(hiseq_ssheet) # If the module wasn't loaded, there's nothing we can do, so warn else: logger2.error("The necessary dependencies for processing MiSeq runs with CASAVA could not be loaded") # Upload the necessary files loc_args = args + (None, ) _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg", False), "process_msg": False, "store_msg": kwargs.get("store_msg", False), "backup_msg": kwargs.get("backup_msg", False), "push_data": kwargs.get("push_data", False)}) # Touch the indicator flag that processing of read1 has been completed utils.touch_indicator_file(os.path.join(dname, "initial_processing_completed.txt")) def process_first_read(*args, **kwargs): """Processing to be performed after the first read and the index reads have been sequenced """ dname, config = args[0:2] # Do bcl -> fastq conversion and demultiplexing using Casava1.8+ if kwargs.get("casava", False): if not kwargs.get("no_casava_processing", False): logger2.info("Generating fastq.gz files for read 1 of {:s}".format(dname)) # Touch the indicator flag that processing of read1 has been started utils.touch_indicator_file(os.path.join(dname, "first_read_processing_started.txt")) unaligned_dirs = _generate_fastq_with_casava(dname, config, r1=True)
def test__is_started_first_read_processing(self): """First read processing started """ self.assertFalse(ifm._is_started_first_read_processing(self.rootdir)) utils.touch_indicator_file(os.path.join(self.rootdir,"first_read_processing_started.txt")) self.assertTrue(ifm._is_started_first_read_processing(self.rootdir))
def test__is_started_initial_processing(self): """Initial processing started""" self.assertFalse(ifm._is_started_initial_processing(self.rootdir)) utils.touch_indicator_file( os.path.join(self.rootdir, "initial_processing_started.txt")) self.assertTrue(ifm._is_started_initial_processing(self.rootdir))