def test_demultiplex_gz(tmp_dir, file_format): """ Test :py:func:`riboviz.demultiplex_fastq.demultiplex` using GZIPped FASTQ files. Each ``file_format`` consists of a FASTQ GZIP file name format and the corresponding non-GZIP FASTQ file name format. :param tmp_dir: Temporary directory :type tmp_dir: str or unicode :param file_format: File name format :type file_format: tuple(str or unicode, str or unicode) """ gz_fmt, fmt = file_format tmp_fastq_file = os.path.join(tmp_dir, gz_fmt.format("test_multiplex")) with open(os.path.join(riboviz.test.SIMDATA_DIR, "multiplex.fastq"), "rb") as fr: with gzip.open(tmp_fastq_file, "wb") as fw: shutil.copyfileobj(fr, fw) demultiplex_fastq.demultiplex( os.path.join(riboviz.test.SIMDATA_DIR, "multiplex_barcodes.tsv"), tmp_fastq_file, mismatches=2, out_dir=tmp_dir) actual_num_reads = os.path.join( tmp_dir, demultiplex_fastq.NUM_READS_FILE) expected_num_reads = os.path.join( riboviz.test.SIMDATA_DIR, "deplex", demultiplex_fastq.NUM_READS_FILE) utils.equal_tsv(expected_num_reads, actual_num_reads) for tag in ["Tag0", "Tag1", "Tag2", "Unassigned"]: # Actual data has extension matching lower-case version # of multiplexed file's extension. actual_fq_gz = os.path.join(tmp_dir, gz_fmt.lower().format(tag)) actual_fq = os.path.join(tmp_dir, fmt.format(tag)) # Simulated data always has a .fastq extension. expected_fq = os.path.join(riboviz.test.SIMDATA_DIR, "deplex", fastq.FASTQ_FORMAT.format(tag)) # Decompress actual_fq_gz with gzip.open(actual_fq_gz, "rb") as fr: with open(actual_fq, "wb") as fw: shutil.copyfileobj(fr, fw) fastq.equal_fastq(expected_fq, actual_fq) # The definition of the simulated data means that Tag3 has no # matches, as Tag0|1|2 will match any barcodes first. Check # there is no Tag3-related output file. assert not os.path.exists(os.path.join(tmp_dir, gz_fmt.lower().format("Tag3")))
def test_demultiplex_output_error(): """ Test :py:func:`riboviz.demultiplex_fastq.demultiplex` raises ``IOError`` if the output directory cannot be created. """ with pytest.raises(IOError): demultiplex_fastq.demultiplex( os.path.join(riboviz.test.SIMDATA_DIR, "multiplex_barcodes.tsv"), os.path.join(riboviz.test.SIMDATA_DIR, "multiplex.fastq"), # Pass existing file as out_dir value. out_dir=os.path.join(riboviz.test.SIMDATA_DIR, "multiplex_barcodes.tsv"))
def test_demultiplex_no_read1_file(tmp_dir): """ Test :py:func:`riboviz.demultiplex_fastq.demultiplex` raises ``FileNotFoundError`` if the FASTQ file is not found. :param tmp_dir: Temporary directory :type tmp_dir: str or unicode """ with pytest.raises(FileNotFoundError): demultiplex_fastq.demultiplex(os.path.join(riboviz.test.SIMDATA_DIR, "multiplex_barcodes.tsv"), "nosuchfile.fastq", out_dir=tmp_dir)
def test_demultiplex_no_sample_sheet(tmp_dir): """ Test :py:func:`riboviz.demultiplex_fastq.demultiplex` raises ``FileNotFoundError`` if the sample sheet is not found. :param tmp_dir: Temporary directory :type tmp_dir: str or unicode """ with pytest.raises(FileNotFoundError): demultiplex_fastq.demultiplex("nosuchfile.tsv", os.path.join(riboviz.test.SIMDATA_DIR, "multiplex.fastq"), out_dir=tmp_dir)
def test_demultiplex(tmp_dir, file_format): """ Test :py:func:`riboviz.demultiplex_fastq.demultiplex`. :param tmp_dir: Temporary directory :type tmp_dir: str or unicode :param file_format: FASTQ file format :type file_format: str or unicode """ tmp_fastq_file = os.path.join(tmp_dir, file_format.format("test_multiplex")) shutil.copyfile(os.path.join(riboviz.test.SIMDATA_DIR, "multiplex.fastq"), tmp_fastq_file) demultiplex_fastq.demultiplex( os.path.join(riboviz.test.SIMDATA_DIR, "multiplex_barcodes.tsv"), tmp_fastq_file, mismatches=2, out_dir=tmp_dir) actual_num_reads = os.path.join( tmp_dir, demultiplex_fastq.NUM_READS_FILE) expected_num_reads = os.path.join( riboviz.test.SIMDATA_DIR, "deplex", demultiplex_fastq.NUM_READS_FILE) utils.equal_tsv(expected_num_reads, actual_num_reads) for tag in ["Tag0", "Tag1", "Tag2", "Unassigned"]: # Actual data has extension matching lower-case version # of multiplexed file's extension. actual_fq = os.path.join(tmp_dir, file_format.lower().format(tag)) # Simulated data always has a .fastq extension. expected_fq = os.path.join(riboviz.test.SIMDATA_DIR, "deplex", fastq.FASTQ_FORMAT.format(tag)) fastq.equal_fastq(expected_fq, actual_fq) # The definition of the simulated data means that Tag3 has no # matches, as Tag0|1|2 will match any barcodes first. Check # there is no Tag3-related output file. assert not os.path.exists(os.path.join(tmp_dir, file_format.lower().format("Tag3")))
def invoke_demultiplex_fastq(): """ Parse command-line options then invoke :py:func:`riboviz.demultiplex_fastq.demultiplex`. """ print(provenance.write_provenance_to_str(__file__)) options = parse_command_line_options() sample_sheet_file = options.sample_sheet_file read1_file = options.read1_file read2_file = options.read2_file mismatches = options.mismatches out_dir = options.out_dir delimiter = options.delimiter demultiplex_fastq.demultiplex(sample_sheet_file, read1_file, read2_file, mismatches, out_dir, delimiter)