def _run_qc_fastqc(in_files, data, out_dir): in_files = fastq.downsample(in_files[0], in_files[1], N=5000000) for fastq_file in in_files: if fastq_file: fastqc.run( fastq_file, data, os.path.join( out_dir, utils.splitext_plus(os.path.basename(fastq_file))[0]))
def _fastqc(input_file, out_dir): data = {'config': {'algorithm': {}}} if not file_exists(out_dir): dw_file, _ = fastq.downsample(input_file, None, data, int(1e7)) cmd = ("fastqc {dw_file} --extract -o {out_dir}") out_dir = os.path.abspath(out_dir) safe_makedir(out_dir) do.run(cmd.format(**locals()), "Doing Fastqc %s" % input_file) logger.my_logger.debug(cmd.format(**locals())) return out_dir
def process_lane(item): """Prepare lanes, potentially splitting based on barcodes and reducing the number of reads for a test run """ NUM_DOWNSAMPLE = 10000 logger.debug("Preparing %s" % item["rgnames"]["lane"]) file1, file2 = get_fastq_files(item) if item.get("test_run", False): if bam.is_bam(file1): file1 = bam.downsample(file1, item, NUM_DOWNSAMPLE) else: file1, file2 = fastq.downsample(file1, file2, item, NUM_DOWNSAMPLE, quick=True) item["files"] = (file1, file2) return [item]
def prepare_sample(data): """Prepare a sample to be run, potentially converting from BAM to FASTQ and/or downsampling the number of reads for a test run """ NUM_DOWNSAMPLE = 10000 logger.debug("Preparing %s" % data["rgnames"]["sample"]) file1, file2 = get_fastq_files(data) if data.get("test_run", False): if bam.is_bam(file1): file1 = bam.downsample(file1, data, NUM_DOWNSAMPLE) file2 = None else: file1, file2 = fastq.downsample(file1, file2, data, NUM_DOWNSAMPLE, quick=True) data["files"] = [file1, file2] return [[data]]
def process_lane(item): """Prepare lanes, potentially splitting based on barcodes and reducing the number of reads for a test run """ NUM_DOWNSAMPLE = 10000 logger.debug("Preparing %s" % item["rgnames"]["lane"]) file1, file2 = get_fastq_files(item) if item.get("test_run", False): if bam.is_bam(file1): file1 = bam.downsample(file1, item, NUM_DOWNSAMPLE) file2 = None else: file1, file2 = fastq.downsample(file1, file2, item, NUM_DOWNSAMPLE, quick=True) item["files"] = [file1, file2] return [[item]]