Esempio n. 1
0
def test_fasta__from_file(func, tmp_path):
    expected = [
        FASTQ("This_is_FASTA!", None, "ACGTN", "12345"),
        FASTQ("This_is_ALSO_FASTA!", None, "CGTNA", "56789"),
    ]

    with func(tmp_path / "file", "wt") as handle:
        for item in expected:
            item.write(handle)

    assert list(FASTQ.from_file(tmp_path / "file")) == expected
Esempio n. 2
0
def main(argv):
    args = parse_args(argv)

    seq_retained_nts = 0
    seq_retained_reads = 0

    for filename in args.files:
        qualities = FASTQualities()
        for record in FASTQ.from_file(filename):
            qualities.update(record)

            seq_retained_reads += 1
            seq_retained_nts += len(record.sequence)

        offsets = qualities.offsets()
        if offsets == FASTQualities.BOTH:
            print(
                "FASTQ file(s) contains quality scores with both quality offsets (33 "
                "and 64); file may be unexpected format or corrupt. Please ensure that "
                "this file contains valid FASTQ reads from a single source.",
                file=sys.stderr,
            )

            return 1
        elif offsets == FASTQualities.MISSING:
            if args.no_empty:
                print("FASTQ file is empty.", file=sys.stderr)

                return 1
        elif offsets not in (FASTQualities.AMBIGIOUS, args.offset):
            print(
                "FASTQ file contains quality scores with wrong quality score offset "
                "(%i); expected reads with quality score offset %i. Ensure that the "
                "'QualityOffset' specified in the makefile corresponds to the input."
                % (offsets, args.offset),
                file=sys.stderr,
            )

            return 1

    print(
        json.dumps(
            {
                "filenames": args.files,
                "seq_retained_reads": seq_retained_reads,
                "seq_retained_nts": seq_retained_nts,
                "seq_collapsed": seq_retained_reads if args.collapsed else 0,
            },
            indent=2,
            sort_keys=True,
        )
    )

    return 0
Esempio n. 3
0
def test_fasta__from_lines__multiple_records():
    lines = [
        "@first\n",
        "TGTTCTCCACCGTGCACAAC\n",
        "+",
        "12345678901234567890\n",
        "@Second XT:1:0\n",
        "GAGAGCTCAGCTAAC\n",
        "+\n",
        "098765432109876\n",
        "@Third\n",
        "GGCATTCGGC\n",
        "+\n",
        "5678901234\n",
    ]
    expected = [
        FASTQ("first", None, "TGTTCTCCACCGTGCACAAC", "12345678901234567890"),
        FASTQ("Second", "XT:1:0", "GAGAGCTCAGCTAAC", "098765432109876"),
        FASTQ("Third", None, "GGCATTCGGC", "5678901234"),
    ]
    assert list(FASTQ.from_lines(lines)) == list(expected)
Esempio n. 4
0
def test_fasta__sorting_greater_equal():
    assert not FASTQ("A", "B", "C", "D") > FASTQ("A", "B", "C", "D")
    assert FASTQ("B", "B", "C", "D") > FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "C", "C", "D") > FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "B", "D", "D") > FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "B", "C", "E") > FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "B", "C", "D") >= FASTQ("A", "B", "C", "D")
    assert FASTQ("B", "B", "C", "D") >= FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "C", "C", "D") >= FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "B", "D", "D") >= FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "B", "C", "E") >= FASTQ("A", "B", "C", "D")
Esempio n. 5
0
def test_fasta__sorting_less_equal():
    assert not FASTQ("A", "B", "C", "D") < FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "B", "C", "D") < FASTQ("B", "B", "C", "D")
    assert FASTQ("A", "B", "C", "D") < FASTQ("A", "C", "C", "D")
    assert FASTQ("A", "B", "C", "D") < FASTQ("A", "B", "D", "D")
    assert FASTQ("A", "B", "C", "D") <= FASTQ("A", "B", "C", "D")
    assert FASTQ("A", "B", "C", "D") <= FASTQ("B", "B", "C", "D")
    assert FASTQ("A", "B", "C", "D") <= FASTQ("A", "C", "C", "D")
    assert FASTQ("A", "B", "C", "D") <= FASTQ("A", "B", "D", "D")
Esempio n. 6
0
def test_fasta__inequality():
    assert FASTQ("A", "B", "C", "D") != FASTQ("A", "B", "D", "D")
    assert FASTQ("A", "B", "C", "D") != FASTQ("A", None, "C", "D")
    assert FASTQ("A", "B", "C", "D") != FASTQ("D", "B", "C", "D")
Esempio n. 7
0
def test_fastq__write():
    expected = "@foobar\n%s\n+\n%s\n" % (_SEQ_FRAG, _QUAL_FRAG)
    stringf = io.StringIO()
    FASTQ("foobar", None, _SEQ_FRAG, _QUAL_FRAG).write(stringf)
    assert stringf.getvalue() == expected
Esempio n. 8
0
def test_fastq__constructor__seq_must_be_string_type():
    with pytest.raises(FASTQError, match="FASTQ sequence must be a string"):
        FASTQ("Seq1", None, 1, "1234")
Esempio n. 9
0
def test_fastq__constructor__name_must_be_non_empty():
    with pytest.raises(FASTQError,
                       match="FASTQ name must be a non-empty string"):
        FASTQ("", None, "ACGT", "1234")
Esempio n. 10
0
def _simple_fastq_record():
    return FASTQ("Dummy", "Meta-inf", "ACGT", "1234")
Esempio n. 11
0
def test_fasta__from_lines__invalid_header():
    lines = [">fastq\n", "GGCATTCGGC\n", "+\n", "5678901234\n"]
    with pytest.raises(FASTQError, match="Invalid FASTQ header"):
        list(FASTQ.from_lines(lines))
Esempio n. 12
0
def test_fasta__from_lines__partial__3_lines():
    with pytest.raises(FASTQError, match="Partial FASTQ record"):
        list(FASTQ.from_lines(["@fastq1\n", "ACGT\n", "+\n"]))
Esempio n. 13
0
def test_fasta__from_lines_single_record():
    lines = ["@single\n", "CCTTCATCCA\n", "+", "1234567890"]
    expected = [FASTQ("single", None, "CCTTCATCCA", "1234567890")]
    assert list(FASTQ.from_lines(lines)) == list(expected)
Esempio n. 14
0
def test_fasta__from_lines__no_records__empty_line():
    assert list(FASTQ.from_lines([""])) == list([])
Esempio n. 15
0
def test_fastq__write_with_meta_information():
    expected = "@foobar my Meta-Info\n%s\n+\n%s\n" % (_SEQ_FRAG, _QUAL_FRAG)
    stringf = io.StringIO()
    FASTQ("foobar", "my Meta-Info", _SEQ_FRAG, _QUAL_FRAG).write(stringf)
    assert stringf.getvalue() == expected
Esempio n. 16
0
def test_fasta__hash():
    assert hash(FASTQ("A", "B", "C", "D")) == hash(FASTQ("A", "B", "C", "D"))
    assert hash(FASTQ("A", "B", "C", "D")) != hash(FASTQ("B", "B", "C", "D"))
    assert hash(FASTQ("A", "B", "C", "D")) != hash(FASTQ("A", "C", "C", "D"))
    assert hash(FASTQ("A", "B", "C", "D")) != hash(FASTQ("A", "B", "D", "D"))
    assert hash(FASTQ("A", "B", "C", "D")) != hash(FASTQ("A", "B", "C", "E"))
Esempio n. 17
0
def test_fasta__from_lines__mismatching_lengths():
    lines = ["@fastq\n", "GGCATTCGGC\n", "+\n", "567890123\n"]
    with pytest.raises(
            FASTQError,
            match="Sequence length does not match qualities length"):
        list(FASTQ.from_lines(lines))
Esempio n. 18
0
def test_fasta__unimplemented_comparison():
    assert NotImplemented is FASTQ("A", None, "C", "D").__eq__(10)
    assert NotImplemented is FASTQ("A", None, "C", "D").__lt__(10)
    assert NotImplemented is FASTQ("A", None, "C", "D").__le__(10)
    assert NotImplemented is FASTQ("A", None, "C", "D").__ge__(10)
    assert NotImplemented is FASTQ("A", None, "C", "D").__gt__(10)
Esempio n. 19
0
def test_fasta__from_lines__invalid_separator():
    lines = ["@fastq\n", "GGCATTCGGC\n", "?\n", "5678901234\n"]
    with pytest.raises(FASTQError, match="Invalid FASTQ separator"):
        list(FASTQ.from_lines(lines))
Esempio n. 20
0
def test_fastq__constructor__no_meta():
    record = FASTQ("Dummy", None, "ACGT", "1234")
    assert record.name == "Dummy"
    assert record.meta == ""
    assert record.sequence == "ACGT"
    assert record.qualities == "1234"
Esempio n. 21
0
def test_fastq__constructor__seq_and_qual_must_have_same_len():
    with pytest.raises(FASTQError,
                       match="FASTQ sequence and qualities length differ"):
        FASTQ("Seq1", None, "ACGT", "12345")
Esempio n. 22
0
def test_fastq__constructor__meta_must_be_string_type_or_none():
    with pytest.raises(FASTQError,
                       match="FASTQ meta must be a string, or None"):
        FASTQ("Seq1", 1, "ACGT", "1234")
Esempio n. 23
0
        FASTQ("This_is_FASTA!", None, "ACGTN", "12345"),
        FASTQ("This_is_ALSO_FASTA!", None, "CGTNA", "56789"),
    ]

    with func(tmp_path / "file", "wt") as handle:
        for item in expected:
            item.write(handle)

    assert list(FASTQ.from_file(tmp_path / "file")) == expected


###############################################################################
###############################################################################
# Tests for 'FASTQualities'

_33_READ = FASTQ("33", None, "ACGT", "!02I")
_64_READ = FASTQ("33", None, "ACGT", "@UXi")
_AMBIGIOUS_read = FASTQ("33", None, "ACGT", ";CDI")


def test_fastqualities__no_qualities():
    quals = FASTQualities()

    assert quals.offsets() == FASTQualities.MISSING


def test_fastqualities__phred_33():
    quals = FASTQualities()
    quals.update(_33_READ)

    assert quals.offsets() == FASTQualities.OFFSET_33
Esempio n. 24
0
def test_fastq__constructor__qual_must_be_string_type():
    with pytest.raises(FASTQError, match="FASTQ qualities must be a string"):
        FASTQ("Seq1", None, "ACGT", 1234)
Esempio n. 25
0
def test_fastq__repr_with_meta_information():
    expected = "FASTQ('foobar', 'my Meta-Info', %r, %r)" % (_SEQ_FRAG,
                                                            _QUAL_FRAG)
    result = repr(FASTQ("foobar", "my Meta-Info", _SEQ_FRAG, _QUAL_FRAG))
    assert result == expected
Esempio n. 26
0
def test_fastq__repr():
    expected = "FASTQ('foobar', '', %r, %r)" % (_SEQ_FRAG, _QUAL_FRAG)
    result = repr(FASTQ("foobar", None, _SEQ_FRAG, _QUAL_FRAG))
    assert result == expected
Esempio n. 27
0
def test_fasta__equality():
    assert FASTQ("A", "B", "C", "D") == FASTQ("A", "B", "C", "D")