def test_checkforrun(self): """Check for the presence of runs in an Illumina SampleSheet. """ fcdir = "fake/101007_80HM7ABXX" config = {"samplesheet_directories": [os.path.dirname(self.ss_file)]} ss = samplesheet.run_has_samplesheet(fcdir, config, False) assert ss is not None fcdir = "fake/101007_NOPEXX" ss = samplesheet.run_has_samplesheet(fcdir, config, False) assert ss is None
def test_checkforrun(self): """Check for the presence of runs in an Illumina SampleSheet. """ fcdir = "fake/101007_80HM7ABXX" config = {"samplesheet_directories": [os.path.dirname(self.ss_file)]} ss = samplesheet.run_has_samplesheet(fcdir, config, False) assert ss is not None fcdir = "fake/101007_NOPEXX" ss = samplesheet.run_has_samplesheet(fcdir, config, False) assert ss is None
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq): """Search for any new directories that have not been reported. """ reported = _read_reported(config["msg_db"]) for dname in _get_directories(config): if os.path.isdir(dname) and dname not in reported: if _is_finished_dumping(dname): log.info("The instrument has finished dumping on directory %s" % dname) _update_reported(config["msg_db"], dname) ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: out_file = os.path.join(dname, "run_info.yaml") log.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file)) samplesheet.csv2yaml(ss_file, out_file) if qseq: log.info("Generating qseq files for %s" % dname) _generate_qseq(get_qseq_dir(dname), config) if fastq: log.info("Generating fastq files for %s" % dname) _generate_fastq(dname, config) store_files, process_files = _files_to_copy(dname) if process_msg: finished_message(config["msg_process_tag"], dname, process_files, amqp_config) if store_msg: finished_message(config["msg_store_tag"], dname, store_files, amqp_config)
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq): """Search for any new directories that have not been reported. """ reported = _read_reported(config["msg_db"]) for dname in _get_directories(config): if os.path.isdir(dname) and dname not in reported: if _is_finished_dumping(dname): log.info( "The instrument has finished dumping on directory %s" % dname) _update_reported(config["msg_db"], dname) ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: out_file = os.path.join(dname, "run_info.yaml") log.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file)) samplesheet.csv2yaml(ss_file, out_file) if qseq: log.info("Generating qseq files for %s" % dname) _generate_qseq(get_qseq_dir(dname), config) if fastq: log.info("Generating fastq files for %s" % dname) _generate_fastq(dname, config) store_files, process_files = _files_to_copy(dname) if process_msg: finished_message(config["msg_process_tag"], dname, process_files, amqp_config) if store_msg: finished_message(config["msg_store_tag"], dname, store_files, amqp_config)
def _process_samplesheets(dname, config): """Process Illumina samplesheets into YAML files for post-processing. """ ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: out_file = os.path.join(dname, "run_info.yaml") log.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file)) samplesheet.csv2yaml(ss_file, out_file)
def _process_samplesheets(dname, config): """Process Illumina samplesheets into YAML files for post-processing. """ ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: out_file = os.path.join(dname, "run_info.yaml") logger2.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file)) samplesheet.csv2yaml(ss_file, out_file)
def _generate_fastq_with_casava(fc_dir, config, r1=False): """Perform demultiplexing and generate fastq.gz files for the current flowecell using CASAVA (>1.8). """ basecall_dir = os.path.join(fc_dir, "Data", "Intensities", "BaseCalls") casava_dir = config["program"].get("casava") unaligned_dir = os.path.join(fc_dir, "Unaligned") samplesheet_file = samplesheet.run_has_samplesheet(fc_dir, config) num_mismatches = config["algorithm"].get("mismatches", 1) num_cores = config["algorithm"].get("num_cores", 1) im_stats = config["algorithm"].get("ignore-missing-stats",False) im_bcl = config["algorithm"].get("ignore-missing-bcl",False) im_control = config["algorithm"].get("ignore-missing-control",False) # Write to log files configure_out = os.path.join(fc_dir,"configureBclToFastq.out") configure_err = os.path.join(fc_dir,"configureBclToFastq.err") casava_out = os.path.join(fc_dir,"bclToFastq_R{:d}.out".format(2-int(r1))) casava_err = os.path.join(fc_dir,"bclToFastq_R{:d}.err".format(2-int(r1))) cl = [os.path.join(casava_dir, "configureBclToFastq.pl")] cl.extend(["--input-dir", basecall_dir]) cl.extend(["--output-dir", unaligned_dir]) cl.extend(["--mismatches", str(num_mismatches)]) cl.extend(["--fastq-cluster-count", "0"]) if samplesheet_file is not None: cl.extend(["--sample-sheet", samplesheet_file]) if im_stats: cl.append("--ignore-missing-stats") if im_bcl: cl.append("--ignore-missing-bcl") if im_control: cl.append("--ignore-missing-control") bm = _get_bases_mask(fc_dir) if bm is not None: cl.extend(["--use-bases-mask", bm]) if r1: # Run configuration script logger2.info("Configuring BCL to Fastq conversion") logger2.debug(cl) co = open(configure_out,'w') ce = open(configure_err,'w') try: subprocess.check_call(cl,stdout=co,stderr=ce) co.close() ce.close() except subprocess.CalledProcessError, e: logger2.error("Configuring BCL to Fastq conversion for {:s} FAILED " \ "(exit code {}), please check log files {:s}, {:s}".format(fc_dir, str(e.returncode), configure_out, configure_err)) raise e
def initial_processing(*args, **kwargs): """Initial processing to be performed after the first base report """ dname, config = args[0:2] # Touch the indicator flag that processing of read1 has been started utils.touch_indicator_file(os.path.join(dname, "initial_processing_started.txt")) # Copy the samplesheet to the run folder ss_file = samplesheet.run_has_samplesheet(dname, config) if ss_file: dst = os.path.join(dname,os.path.basename(ss_file)) try: copyfile(ss_file,dst) except IOError, e: logger2.error("Error copying samplesheet {} from {} to {}: {}" \ "".format(os.path.basename(ss_file), os.path.dirname(ss_file), os.path.dirname(dst), e))
def _generate_fastq_with_casava(fc_dir, config, r1=False): """Perform demultiplexing and generate fastq.gz files for the current flowecell using CASAVA (>1.8). """ basecall_dir = os.path.join(fc_dir, "Data", "Intensities", "BaseCalls") casava_dir = config["program"].get("casava") unaligned_dir = os.path.join(fc_dir, "Unaligned") samplesheet_file = samplesheet.run_has_samplesheet(fc_dir, config) num_mismatches = config["algorithm"].get("mismatches", 1) num_cores = config["algorithm"].get("num_cores", 1) cl = [os.path.join(casava_dir, "configureBclToFastq.pl")] cl.extend(["--input-dir", basecall_dir]) cl.extend(["--output-dir", unaligned_dir]) cl.extend(["--sample-sheet", samplesheet_file]) cl.extend(["--mismatches", str(num_mismatches)]) options = ["--fastq-cluster-count", "0", \ "--ignore-missing-stats", \ "--ignore-missing-bcl", \ "--ignore-missing-control"] cl.extend(options) if r1: # Run configuration script logger2.info("Configuring BCL to Fastq conversion") logger2.debug(cl) subprocess.check_call(cl) # Go to <Unaligned> folder with utils.chdir(unaligned_dir): # Perform make cl = ["nohup", "make", "-j", str(num_cores)] if r1: cl.append("r1") logger2.info("Demultiplexing and converting bcl to fastq.gz") logger2.debug(cl) subprocess.check_call(cl) logger2.debug("Done")