Exemple #1
0
def test_fastq_unzipped():

    for thisdata in [data, datagz]:
        # isntanciation
        f = fastq.FastQ(thisdata)
        assert f.data_format == "Illumina_1.8+"
        # count lines
        # rune it twice because we want to make sure re-running count_lines
        # (decompression with zlib) works when run again.
        assert f.count_lines() == 1000
        assert f.count_lines() == 1000
        assert f.count_reads() == 250
        assert f.count_reads() == 250

        # extract head of the file into an unzipped file
        ft = TempFile()
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        # extract head of the file and zip output
        ft = TempFile(suffix=".gz")
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        with FastQ(thisdata) as ff:
            assert len(ff) == 250

        with TempFile() as fh:
            selection = f.select_random_reads(10, fh.name)
            f.select_random_reads(selection, fh.name)
Exemple #2
0
def test_split():
    # general tests
    f = fastq.FastQ(data)
    try:
        f.split_lines(250)  # not a multiple of 4
        assert False
    except:
        assert True

    #
    outputs = f.split_lines(500)
    assert len(outputs) == 2
    remove_files(outputs)
    outputs = f.split_lines(256)
    assert len(outputs) == 4
    remove_files(outputs)

    # Now tests the zip/unzip cases
    f = fastq.FastQ(data)
    outputs = f.split_lines(500, gzip=False)
    remove_files(outputs)
    outputs = f.split_lines(500, gzip=True)
    remove_files(outputs)

    f = fastq.FastQ(datagz)
    outputs = f.split_lines(500, gzip=False)
    remove_files(outputs)
    outputs = f.split_lines(500, gzip=True)
    remove_files(outputs)

    f.split_lines(1000000) is None  # too many
Exemple #3
0
def test_basic():
    f = fastq.FastQ(data)
    assert f.stats() == {
        'N': 250,
        'mean_read_length': 101.0,
        'sum_read_length': 25250
    }
Exemple #4
0
def test_others():
    # kmer
    f = fastq.FastQ(data)
    f.to_kmer_content()

    #krona
    with TempFile() as fh:
        f.to_krona(5, fh.name)

    assert f == f

    f1 = fastq.FastQ(data)
    f2 = fastq.FastQ(sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz"))
    assert f1 != f2
    f1 = fastq.FastQ(data)
    f2 = fastq.FastQ(data)
    assert f1 == f2
Exemple #5
0
def test_basic():
    assert is_fastq(data) == True
    f = fastq.FastQ(data)
    assert f.stats() == {
        'N': 250,
        'mean_read_length': 101.0,
        'sum_read_length': 25250
    }
    f.get_lengths()

    ft = TempFile()
    f.to_fasta(ft.name)
Exemple #6
0
def test_identifiers():
    f = fastq.FastQ(data)
    identifier = fastq.Identifier(f.next()["identifier"])
    assert identifier.version == 'Illumina_1.8+'

    identifier = fastq.Identifier(f.next()["identifier"], "Illumina_1.8+")
    assert identifier.version == 'Illumina_1.8+'

    identifier = fastq.Identifier("@prefix:1_13_573/1")
    assert identifier.version == "unknown"

    identifier = fastq.Identifier("@SEQ:1:1101:9010:3891#0/1")
    identifier = fastq.Identifier("@SEQ:1:1101:9010:3891#0/1",
                                  version="Illumina_1.4+")

    print(identifier)
    identifier.__repr__()
Exemple #7
0
def test_filter():
    f = fastq.FastQ(data)
    # keeps all

    with TempFile() as fh:
        f.filter(min_bp=80,
                 max_bp=120,
                 output_filename=fh.name,
                 progressbar=False)
        assert len(f) == 250
        ff = FastQ(fh.name)
        assert len(ff) == 250

    # keeps nothing
    with TempFile() as fh:
        f.filter(min_bp=80, max_bp=90, output_filename=fh.name)
        assert len(f) == 250
        ff = FastQ(fh.name)
        assert len(ff) == 0
Exemple #8
0
def test_select_reads():
    f = fastq.FastQ(data)
    ft = TempFile()
    f.select_reads(["HISEQ:426:C5T65ACXX:5:2302:4953:2090"],
                   output_filename=ft.name)