def test_demultiplex_gz(tmp_dir, file_format):
    """
    Test :py:func:`riboviz.demultiplex_fastq.demultiplex` using
    GZIPped FASTQ files.

    Each ``file_format`` consists of a FASTQ GZIP file name format and
    the corresponding non-GZIP FASTQ file name format.

    :param tmp_dir: Temporary directory
    :type tmp_dir: str or unicode
    :param file_format: File name format
    :type file_format: tuple(str or unicode, str or unicode)
    """
    gz_fmt, fmt = file_format
    tmp_fastq_file = os.path.join(tmp_dir,
                                  gz_fmt.format("test_multiplex"))
    with open(os.path.join(riboviz.test.SIMDATA_DIR,
                           "multiplex.fastq"), "rb") as fr:
        with gzip.open(tmp_fastq_file, "wb") as fw:
            shutil.copyfileobj(fr, fw)
    demultiplex_fastq.demultiplex(
        os.path.join(riboviz.test.SIMDATA_DIR,
                     "multiplex_barcodes.tsv"),
        tmp_fastq_file,
        mismatches=2,
        out_dir=tmp_dir)

    actual_num_reads = os.path.join(
        tmp_dir,
        demultiplex_fastq.NUM_READS_FILE)
    expected_num_reads = os.path.join(
        riboviz.test.SIMDATA_DIR,
        "deplex",
        demultiplex_fastq.NUM_READS_FILE)
    utils.equal_tsv(expected_num_reads, actual_num_reads)
    for tag in ["Tag0", "Tag1", "Tag2", "Unassigned"]:
        # Actual data has extension matching lower-case version
        # of multiplexed file's extension.
        actual_fq_gz = os.path.join(tmp_dir,
                                    gz_fmt.lower().format(tag))
        actual_fq = os.path.join(tmp_dir,
                                 fmt.format(tag))
        # Simulated data always has a .fastq extension.
        expected_fq = os.path.join(riboviz.test.SIMDATA_DIR,
                                   "deplex",
                                   fastq.FASTQ_FORMAT.format(tag))
        # Decompress actual_fq_gz
        with gzip.open(actual_fq_gz, "rb") as fr:
            with open(actual_fq, "wb") as fw:
                shutil.copyfileobj(fr, fw)
        fastq.equal_fastq(expected_fq, actual_fq)
    # The definition of the simulated data means that Tag3 has no
    # matches, as Tag0|1|2 will match any barcodes first. Check
    # there is no Tag3-related output file.
    assert not os.path.exists(os.path.join(tmp_dir,
                                           gz_fmt.lower().format("Tag3")))
def test_demultiplex_output_error():
    """
    Test :py:func:`riboviz.demultiplex_fastq.demultiplex` raises
    ``IOError`` if the output directory cannot be created.
    """
    with pytest.raises(IOError):
        demultiplex_fastq.demultiplex(
            os.path.join(riboviz.test.SIMDATA_DIR, "multiplex_barcodes.tsv"),
            os.path.join(riboviz.test.SIMDATA_DIR, "multiplex.fastq"),
            # Pass existing file as out_dir value.
            out_dir=os.path.join(riboviz.test.SIMDATA_DIR,
                                 "multiplex_barcodes.tsv"))
def test_demultiplex_no_read1_file(tmp_dir):
    """
    Test :py:func:`riboviz.demultiplex_fastq.demultiplex` raises
    ``FileNotFoundError`` if the FASTQ file is not found.

    :param tmp_dir: Temporary directory
    :type tmp_dir: str or unicode
    """
    with pytest.raises(FileNotFoundError):
        demultiplex_fastq.demultiplex(os.path.join(riboviz.test.SIMDATA_DIR,
                                                   "multiplex_barcodes.tsv"),
                                      "nosuchfile.fastq",
                                      out_dir=tmp_dir)
def test_demultiplex_no_sample_sheet(tmp_dir):
    """
    Test :py:func:`riboviz.demultiplex_fastq.demultiplex` raises
    ``FileNotFoundError`` if the sample sheet is not found.

    :param tmp_dir: Temporary directory
    :type tmp_dir: str or unicode
    """
    with pytest.raises(FileNotFoundError):
        demultiplex_fastq.demultiplex("nosuchfile.tsv",
                                      os.path.join(riboviz.test.SIMDATA_DIR,
                                                   "multiplex.fastq"),
                                      out_dir=tmp_dir)
def test_demultiplex(tmp_dir, file_format):
    """
    Test :py:func:`riboviz.demultiplex_fastq.demultiplex`.

    :param tmp_dir: Temporary directory
    :type tmp_dir: str or unicode
    :param file_format: FASTQ file format
    :type file_format: str or unicode
    """
    tmp_fastq_file = os.path.join(tmp_dir,
                                  file_format.format("test_multiplex"))
    shutil.copyfile(os.path.join(riboviz.test.SIMDATA_DIR,
                                 "multiplex.fastq"),
                    tmp_fastq_file)
    demultiplex_fastq.demultiplex(
        os.path.join(riboviz.test.SIMDATA_DIR,
                     "multiplex_barcodes.tsv"),
        tmp_fastq_file,
        mismatches=2,
        out_dir=tmp_dir)

    actual_num_reads = os.path.join(
        tmp_dir,
        demultiplex_fastq.NUM_READS_FILE)
    expected_num_reads = os.path.join(
        riboviz.test.SIMDATA_DIR,
        "deplex",
        demultiplex_fastq.NUM_READS_FILE)
    utils.equal_tsv(expected_num_reads, actual_num_reads)
    for tag in ["Tag0", "Tag1", "Tag2", "Unassigned"]:
        # Actual data has extension matching lower-case version
        # of multiplexed file's extension.
        actual_fq = os.path.join(tmp_dir,
                                 file_format.lower().format(tag))
        # Simulated data always has a .fastq extension.
        expected_fq = os.path.join(riboviz.test.SIMDATA_DIR,
                                   "deplex",
                                   fastq.FASTQ_FORMAT.format(tag))
        fastq.equal_fastq(expected_fq, actual_fq)
    # The definition of the simulated data means that Tag3 has no
    # matches, as Tag0|1|2 will match any barcodes first. Check
    # there is no Tag3-related output file.
    assert not os.path.exists(os.path.join(tmp_dir,
                                           file_format.lower().format("Tag3")))
Beispiel #6
0
def invoke_demultiplex_fastq():
    """
    Parse command-line options then invoke
    :py:func:`riboviz.demultiplex_fastq.demultiplex`.
    """
    print(provenance.write_provenance_to_str(__file__))
    options = parse_command_line_options()
    sample_sheet_file = options.sample_sheet_file
    read1_file = options.read1_file
    read2_file = options.read2_file
    mismatches = options.mismatches
    out_dir = options.out_dir
    delimiter = options.delimiter
    demultiplex_fastq.demultiplex(sample_sheet_file,
                                  read1_file,
                                  read2_file,
                                  mismatches,
                                  out_dir,
                                  delimiter)