def search_for_new(config, config_file, post_config_file, fetch_msg, \ process_msg, store_msg, backup_msg, qseq, fastq, remove_qseq, compress_fastq, casava): """Search for any new unreported directories. """ reported = _read_reported(config["msg_db"]) for dname in _get_directories(config): if os.path.isdir(dname) and not any(dir.startswith(dname) for dir in reported): # Injects run_name on logging calls. # Convenient for run_name on "Subject" for email notifications run_setter = lambda record: record.extra.__setitem__('run', os.path.basename(dname)) with logbook.Processor(run_setter): if casava and _is_finished_dumping_read_1(dname): logger2.info("Generating fastq.gz files for read 1 of %s" % dname) fastq_dir = None _generate_fastq_with_casava(dname, config, r1=True) _post_process_run(dname, config, config_file, fastq_dir, post_config_file, fetch_msg=True, process_msg=False, store_msg=store_msg,backup_msg=False) if _is_finished_dumping(dname): logger2.info("The instrument has finished dumping on directory %s" % dname) _update_reported(config["msg_db"], dname) _process_samplesheets(dname, config) if qseq: logger2.info("Generating qseq files for %s" % dname) _generate_qseq(get_qseq_dir(dname), config) fastq_dir = None if fastq: logger2.info("Generating fastq files for %s" % dname) fastq_dir = _generate_fastq(dname, config) if remove_qseq: _clean_qseq(get_qseq_dir(dname), fastq_dir) _calculate_md5(fastq_dir) if compress_fastq: _compress_fastq(fastq_dir, config) if casava: logger2.info("Generating fastq.gz files for %s" % dname) _generate_fastq_with_casava(dname, config) _post_process_run(dname, config, config_file, fastq_dir, post_config_file, fetch_msg, process_msg, store_msg, backup_msg) # Update the reported database after successful processing _update_reported(config["msg_db"], dname) # Re-read the reported database to make sure it hasn't # changed while processing. reported = _read_reported(config["msg_db"])
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq): """Search for any new directories that have not been reported. """ reported = _read_reported(config["msg_db"]) for dname in _get_directories(config): if os.path.isdir(dname) and dname not in reported: if _is_finished_dumping(dname): log.info("The instrument has finished dumping on directory %s" % dname) _update_reported(config["msg_db"], dname) ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: out_file = os.path.join(dname, "run_info.yaml") log.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file)) samplesheet.csv2yaml(ss_file, out_file) if qseq: log.info("Generating qseq files for %s" % dname) _generate_qseq(get_qseq_dir(dname), config) if fastq: log.info("Generating fastq files for %s" % dname) _generate_fastq(dname, config) store_files, process_files = _files_to_copy(dname) if process_msg: finished_message(config["msg_process_tag"], dname, process_files, amqp_config) if store_msg: finished_message(config["msg_store_tag"], dname, store_files, amqp_config)
def search_for_new(config, config_file, post_config_file, process_msg, store_msg, qseq, fastq): """Search for any new unreported directories. """ reported = _read_reported(config["msg_db"]) for dname in _get_directories(config): if os.path.isdir(dname) and dname not in reported: if _is_finished_dumping(dname): # Injects run_name on logging calls. # Convenient for run_name on "Subject" for email notifications with logbook.Processor(lambda record: record.extra.__setitem__( 'run', os.path.basename(dname))): logger2.info( "The instrument has finished dumping on directory %s" % dname) _update_reported(config["msg_db"], dname) _process_samplesheets(dname, config) if qseq: logger2.info("Generating qseq files for %s" % dname) _generate_qseq(get_qseq_dir(dname), config) fastq_dir = None if fastq: logger2.info("Generating fastq files for %s" % dname) fastq_dir = _generate_fastq(dname, config) _post_process_run(dname, config, config_file, fastq_dir, post_config_file, process_msg, store_msg)
def search_for_new(config, amqp_config, post_config_file, process_msg, store_msg, qseq, fastq): """Search for any new directories that have not been reported. """ reported = _read_reported(config["msg_db"]) for dname in _get_directories(config): if os.path.isdir(dname) and dname not in reported: if _is_finished_dumping(dname): # Injects run_name on logging calls. # Convenient for run_name on "Subject" for email notifications with logbook.Processor(lambda record: record.extra.__setitem__('run', os.path.basename(dname))): log.info("The instrument has finished dumping on directory %s" % dname) _update_reported(config["msg_db"], dname) _process_samplesheets(dname, config) if qseq: log.info("Generating qseq files for %s" % dname) _generate_qseq(get_qseq_dir(dname), config) fastq_dir = None if fastq: log.info("Generating fastq files for %s" % dname) fastq_dir = _generate_fastq(dname, config) _post_process_run(dname, config, amqp_config, fastq_dir, post_config_file, process_msg, store_msg)
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq): """Search for any new directories that have not been reported. """ reported = _read_reported(config["msg_db"]) for dname in _get_directories(config): if os.path.isdir(dname) and dname not in reported: if _is_finished_dumping(dname): log.info( "The instrument has finished dumping on directory %s" % dname) _update_reported(config["msg_db"], dname) ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: out_file = os.path.join(dname, "run_info.yaml") log.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file)) samplesheet.csv2yaml(ss_file, out_file) if qseq: log.info("Generating qseq files for %s" % dname) _generate_qseq(get_qseq_dir(dname), config) if fastq: log.info("Generating fastq files for %s" % dname) _generate_fastq(dname, config) store_files, process_files = _files_to_copy(dname) if process_msg: finished_message(config["msg_process_tag"], dname, process_files, amqp_config) if store_msg: finished_message(config["msg_store_tag"], dname, store_files, amqp_config)
def process_second_read(*args, **kwargs): """Processing to be performed after all reads have been sequences """ dname, config = args[0:2] logger2.info("The instrument has finished dumping on directory %s" % dname) utils.touch_indicator_file(os.path.join(dname,"second_read_processing_started.txt")) _update_reported(config["msg_db"], dname) fastq_dir = None # Do bcl -> fastq conversion and demultiplexing using Casava1.8+ if kwargs.get("casava",False): logger2.info("Generating fastq.gz files for {:s}".format(dname)) _generate_fastq_with_casava(dname, config) else: _process_samplesheets(dname, config) if kwargs.get("qseq",True): logger2.info("Generating qseq files for {:s}".format(dname)) _generate_qseq(get_qseq_dir(dname), config) if kwargs.get("fastq",True): logger2.info("Generating fastq files for {:s}".format(dname)) fastq_dir = _generate_fastq(dname, config) if kwargs.get("remove_qseq",False): _clean_qseq(get_qseq_dir(dname), fastq_dir) _calculate_md5(fastq_dir) # Call the post_processing method loc_args = args + (fastq_dir,) _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg",True), "process_msg": kwargs.get("process_msg",True), "store_msg": kwargs.get("store_msg",True), "backup_msg": kwargs.get("backup_msg",False)}) # Update the reported database after successful processing _update_reported(config["msg_db"], dname) utils.touch_indicator_file(os.path.join(dname,"second_read_processing_completed.txt"))
with open(rsync_out, 'a') as ro: with open(rsync_err, 'a') as re: try: ro.write("-----------\n{}\n".format(" ".join(cl))) re.write("-----------\n{}\n".format(" ".join(cl))) subprocess.check_call(cl, stdout=ro, stderr=re) except subprocess.CalledProcessError, e: logger2.error("rsync transfer of Unaligned results FAILED") else: _process_samplesheets(dname, config) if kwargs.get("qseq", True): logger2.info("Generating qseq files for {:s}".format(dname)) _generate_qseq(get_qseq_dir(dname), config) if kwargs.get("fastq", True): logger2.info("Generating fastq files for {:s}".format(dname)) fastq_dir = _generate_fastq(dname, config) if kwargs.get("remove_qseq", False): _clean_qseq(get_qseq_dir(dname), fastq_dir) _calculate_md5(fastq_dir) # Call the post_processing method loc_args = args + (fastq_dir,) _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg", False), "process_msg": kwargs.get("process_msg", False), "store_msg": kwargs.get("store_msg", False), "backup_msg": kwargs.get("backup_msg", False),