Esempio n. 1
0
def test_interleaved_pair_aligner_run():
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])
    mappings = gem.mapper(filter.interleave([input1, input2]), index)
    paired = gem.pairalign(mappings, index)
    assert paired is not None
    assert sum(1 for x in paired) == 20000  # test dataset does not pair at all
Esempio n. 2
0
def test_interleaving():
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])
    s = 0
    for t in filter.interleave([input1, input2]):
        s += 1
    assert s == 20000
Esempio n. 3
0
def test_interleaving():
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])
    s = 0
    for t in filter.interleave([input1, input2]):
        s += 1
    assert s == 20000
Esempio n. 4
0
def test_interleaved_pair_aligner_run():
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])
    mappings = gem.mapper(filter.interleave([input1, input2]), index)
    paired = gem.pairalign(mappings, index)
    assert paired is not None
    assert sum(1 for x in paired) == 20000  # test dataset does not pair at all
Esempio n. 5
0
def test_iterating_fastq():
    reader = files.open(testfiles["reads_1.fastq"])
    assert reader is not None
    assert len(list(reader)) == 10000
    reader = files.open(testfiles["reads_1.fastq.gz"])
    assert reader is not None
    assert len(list(reader)) == 10000
Esempio n. 6
0
def test_file_merge_pairwise_same_content_big_uncompressed():
    subprocess.call("cp %s %s; gunzip %s;" % (testfiles["20t.map.gz"], results_dir, results_dir + "/20t.map.gz"), shell=True)
    reads_1 = files.open(results_dir + "/20t.map")
    reads_2 = files.open(results_dir + "/20t.map")
    merged = gem.merger(reads_1, [reads_2]).merge(results_dir + "/merge_result.map", threads=8, same_content=True)
    num_reads = sum(1 for r in merged)
    assert num_reads == 20000
Esempio n. 7
0
def test_file_merge_pairwise_same_content_big_sub():
    reads_1 = files.open(testfiles["20t.map.gz"])
    reads_2 = files.open(testfiles["20t_sub.map.gz"])
    merged = gem.merge(reads_1, [reads_2],
                       output=results_dir + "/merge_result.map",
                       threads=8, paired=True)
    num_reads = sum(1 for r in merged)
    assert num_reads == 20000
Esempio n. 8
0
def test_file_merge_pairwise_same():
    reads_1 = files.open(testfiles["test.map"])
    reads_2 = files.open(testfiles["test.map"])
    merged = gem.merge(reads_1, [reads_2],
                       output=results_dir + "/merge_result.map",
                       threads=8, paired=True)
    num_reads = sum(1 for r in merged)
    assert num_reads == 10
def test_writing_interleaved_file():
    source1 = files.open(testfiles["reads_1.fastq"])
    source2 = files.open(testfiles["reads_2.fastq"])
    target = results_dir + "/write_interleaved.fastq"
    out = gt.OutputFile(target)
    gt.interleave([source1, source2]).write_stream(out, write_map=False)
    with open(target) as f:
        lines = f.readlines()
        assert len(lines) == 80000
Esempio n. 10
0
def test_interleaved_mapper_run():
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])

    mappings = gem.mapper(filter.interleave([input1, input2]), index)
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename is None
    assert sum(1 for x in mappings) == 20000
Esempio n. 11
0
def test_interleaved_mapper_run():
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])

    mappings = gem.mapper(filter.interleave([input1, input2]), index)
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename is None
    assert sum(1 for x in mappings) == 20000
Esempio n. 12
0
def test_file_merge_pairwise_same_content_big_uncompressed():
    subprocess.call("cp %s %s; gunzip %s;" % (testfiles["20t.map.gz"], results_dir, results_dir + "/20t.map.gz"), shell=True)
    reads_1 = files.open(results_dir + "/20t.map")
    reads_2 = files.open(results_dir + "/20t.map")
    merged = gem.merge(reads_1, [reads_2],
                       output=results_dir + "/merge_result.map",
                       threads=8, same_content=True)
    num_reads = sum(1 for r in merged)
    assert num_reads == 20000
def test_writing_interleaved_file():
    source1 = files.open(testfiles["reads_1.fastq"])
    source2 = files.open(testfiles["reads_2.fastq"])
    target = results_dir + "/write_interleaved.fastq"
    out = gt.OutputFile(target)
    gt.interleave([source1, source2]).write_stream(out, write_map=False)
    with open(target) as f:
        lines = f.readlines()
        assert len(lines) == 80000
Esempio n. 14
0
def test_writing_fastq_interleaved():
    output = results_dir + "/print_fastq.out"
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])
    filter.interleave([input1, input2]).write_stream(gt.OutputFile(output), write_map=False)
    s = 0
    with open(output) as f:
        for l in f:
            s += 1
    assert s == 80000, s
Esempio n. 15
0
def test_writing_fastq_interleaved():
    output = results_dir + "/print_fastq.out"
    input1 = files.open(testfiles["reads_1.fastq"])
    input2 = files.open(testfiles["reads_2.fastq"])
    filter.interleave([input1, input2]).write_stream(gt.OutputFile(output),
                                                     write_map=False)
    s = 0
    with open(output) as f:
        for l in f:
            s += 1
    assert s == 80000, s
Esempio n. 16
0
def test_cat_to_file():
    target = results_dir + "/catted.fastq"
    reads_1 = files.open(testfiles["reads_1.fastq"])
    reads_2 = files.open(testfiles["reads_2.fastq"])
    out = gt.OutputFile(target)
    gt.cat([reads_1, reads_2]).write_stream(out)
    c = 0
    ids = []
    with open(target) as f:
        for l in f:
            if c % 4 == 0:
                ids.append(l.strip())
            c += 1
    assert len(set(ids)) == 20000
Esempio n. 17
0
def test_cat_to_file():
    target = results_dir + "/catted.fastq"
    reads_1 = files.open(testfiles["reads_1.fastq"])
    reads_2 = files.open(testfiles["reads_2.fastq"])
    out = gt.OutputFile(target)
    gt.cat([reads_1, reads_2]).write_stream(out)
    c = 0
    ids = []
    with open(target) as f:
        for l in f:
            if c % 4 == 0:
                ids.append(l.strip())
            c += 1
    assert len(set(ids)) == 20000
Esempio n. 18
0
def test_async_splitmapper_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.splitmapper(input, index)
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename is None
    assert sum(1 for x in mappings) == 10000
Esempio n. 19
0
def test_junction_extraction_from_splitmap():
    input = files.open(testfiles["reads_1.fastq"])
    index = testfiles["genome.gem"]
    gtf_junctions = set(junctions.from_gtf(testfiles["refseq.gtf"]))
    jj = gem.extract_junctions(input, index, merge_with=gtf_junctions)
    assert junctions is not None
    assert len(jj) == 260
Esempio n. 20
0
def test_async_splitmapper_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.splitmapper(input, index)
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename is None
    assert sum(1 for x in mappings) == 10000
Esempio n. 21
0
def test_sam2bam_sort_sync_execution():
    result = results_dir + "/reads_1.bam"
    input = files.open(testfiles["reads_1.sam"])
    bam = gem.sam2bam(input, result, True)
    assert bam is not None
    assert os.path.exists(result)
    assert sum(1 for x in bam) == 10000
Esempio n. 22
0
def test_junction_extraction_from_splitmap():
    input = files.open(testfiles["reads_1.fastq"])
    index = testfiles["genome.gem"]
    gtf_junctions = set(junctions.from_gtf(testfiles["refseq.gtf"]))
    jj = gem.extract_junctions(input, index, merge_with=gtf_junctions)
    assert junctions is not None
    assert len(jj) == 260
Esempio n. 23
0
def test_sam2bam_sort_sync_execution():
    result = results_dir + "/reads_1.bam"
    input = files.open(testfiles["reads_1.sam"])
    bam = gem.sam2bam(input, result, True)
    assert bam is not None
    assert os.path.exists(result)
    assert sum(1 for x in bam) == 10000
Esempio n. 24
0
def test_quality_pass_on_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.mapper(input,
                          index,
                          output=results_dir + "/quality_passon_mapping.map")
    assert mappings.quality == "offset-33", "Quality should be 'offset-33' but is %s" % (
        str(mappings.quality))
Esempio n. 25
0
def test_sync_mapper_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.mapper(input, index, results_dir + "/result.mapping")
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename is not None
    assert mappings.filename == results_dir + "/result.mapping"
    assert sum(1 for x in mappings) == 10000
Esempio n. 26
0
def test_sync_mapper_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.mapper(input, index, results_dir + "/result.mapping")
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename is not None
    assert mappings.filename == results_dir + "/result.mapping"
    assert sum(1 for x in mappings) == 10000
def test_writing_input_file():
    source = files.open(testfiles["test_merge_target.map"])
    target = results_dir + "/write_input.fastq"
    out = gt.OutputFile(target)
    source.write_stream(out, write_map=False)
    with open(target) as f:
        lines = f.readlines()
        assert len(lines) == 40
def test_writing_input_file():
    source = files.open(testfiles["test_merge_target.map"])
    target = results_dir + "/write_input.fastq"
    out = gt.OutputFile(target)
    source.write_stream(out, write_map=False)
    with open(target) as f:
        lines = f.readlines()
        assert len(lines) == 40
Esempio n. 29
0
def test_writing_fastq():
    output = results_dir + "/print_fastq.out"
    input1 = files.open(testfiles["reads_1.fastq"])
    input1.write_stream(gt.OutputFile(output), write_map=False)
    s = 0
    with open(output) as f:
        for l in f:
            s += 1
    assert s == 40000
Esempio n. 30
0
def test_writing_fastq():
    output = results_dir + "/print_fastq.out"
    input1 = files.open(testfiles["reads_1.fastq"])
    input1.write_stream(gt.OutputFile(output), write_map=False)
    s = 0
    with open(output) as f:
        for l in f:
            s += 1
    assert s == 40000
Esempio n. 31
0
def test_sync_splitmapper_execution():
    gem.loglevel("debug")
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.splitmapper(input, index, results_dir + "/splitmap_out.mapping")
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename == results_dir + "/splitmap_out.mapping"
    assert os.path.exists(results_dir + "/splitmap_out.mapping")
    assert sum(1 for x in mappings) == 10000
Esempio n. 32
0
def test_sync_splitmapper_execution():
    gem.loglevel("debug")
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.splitmapper(input, index,
                               results_dir + "/splitmap_out.mapping")
    assert mappings is not None
    assert mappings.process is not None
    assert mappings.filename == results_dir + "/splitmap_out.mapping"
    assert os.path.exists(results_dir + "/splitmap_out.mapping")
    assert sum(1 for x in mappings) == 10000
Esempio n. 33
0
def test_merging_maps():
    input = files.open(testfiles["test_merge_target.map"])
    source_1 = files.open(testfiles["test_merge_source_1.map"])
    source_2 = files.open(testfiles["test_merge_source_2.map"])
    #result = gem.mapper(input, index, results_dir + "/merged.mapping")
    merger = gem.merge(input, [source_1, source_2])
    count = 0
    for read in merger:
        count += 1
        if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450":
            assert read.to_map().split("\t")[3] == "0+1", read.to_map()
            assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452":
            assert read.to_map().split("\t")[3] == "0+1", read.to_map()
            assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259":
            assert read.to_map().split("\t")[3] == "1", read.to_map()
            assert read.to_map().split("\t")[4] == "chr15:+:72492866:75"
    assert count == 10
Esempio n. 34
0
def test_merging_maps():
    input = files.open(testfiles["test_merge_target.map"])
    source_1 = files.open(testfiles["test_merge_source_1.map"])
    source_2 = files.open(testfiles["test_merge_source_2.map"])
    #result = gem.mapper(input, index, results_dir + "/merged.mapping")
    merger = gem.merge(input, [source_1, source_2])
    count = 0
    for read in merger:
        count += 1
        if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450":
            assert read.to_map().split("\t")[3] == "0+1", read.to_map()
            assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452":
            assert read.to_map().split("\t")[3] == "0+1", read.to_map()
            assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259":
            assert read.to_map().split("\t")[3] == "1", read.to_map()
            assert read.to_map().split("\t")[4] == "chr15:+:72492866:75"
    assert count == 10
Esempio n. 35
0
def test_gem2sam_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.mapper(input, index)
    sam = gem.gem2sam(mappings, index, compact=True)
    assert sam is not None
    assert sam.process is not None
    assert sam.filename is None
    count = 0
    for read in sam:
        count += 1
    assert count == 10000
Esempio n. 36
0
def test_merging_maps_to_file():
    input = files.open(testfiles["test_merge_target.map"])
    source_1 = files.open(testfiles["test_merge_source_1.map"])
    source_2 = files.open(testfiles["test_merge_source_2.map"])
    result = results_dir + "/merged.mapping"
    merger = gem.merge(input, [source_1, source_2], output=result)
    count = 0
    for read in merger:
        count += 1
        if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450 1:N:0:GCCAAT":
            assert read.to_map().split("\t")[3] == "0+1"
            assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452 1:N:0:GCCAAT":
            assert read.to_map().split("\t")[3] == "0+1"
            assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259 1:N:0:GCCAAT":
            assert read.to_map().split("\t")[3] == "1"
            assert read.to_map().split("\t")[4] == "chr15:+:72492866:75"
    assert count == 10
    assert os.path.exists(result)
Esempio n. 37
0
def test_merging_maps_to_file():
    input = files.open(testfiles["test_merge_target.map"])
    source_1 = files.open(testfiles["test_merge_source_1.map"])
    source_2 = files.open(testfiles["test_merge_source_2.map"])
    result = results_dir + "/merged.mapping"
    merger = gem.merger(input, [source_1, source_2])
    count = 0
    for read in merger.merge(result):
        count += 1
        if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450 1:N:0:GCCAAT":
            assert read.to_map().split("\t")[3] == "0+1"
            assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452 1:N:0:GCCAAT":
            assert read.to_map().split("\t")[3] == "0+1"
            assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20"
        elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259 1:N:0:GCCAAT":
            assert read.to_map().split("\t")[3] == "1"
            assert read.to_map().split("\t")[4] == "chr15:+:72492866:75"
    assert count == 10
    assert os.path.exists(result)
Esempio n. 38
0
def test_gem2sam_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.mapper(input, index)
    sam = gem.gem2sam(mappings, index, compact=True)
    assert sam is not None
    assert sam.process is not None
    assert sam.filename is None
    count = 0
    for read in sam:
        count += 1
    assert count == 10000
Esempio n. 39
0
def test_merging_maps_chr21():
    s = [
        "chr21_mapping_initial.map", "chr21_mapping_denovo.map",
        "chr21_mapping_initial_split.map", "chr21_mapping_trim_20.map",
        "chr21_mapping_trim_20_split.map"
    ]
    fs = []
    for f in s:
        fs.append(files.open(testfiles[f]))
    m = gem.merge(fs[0], fs[1:])
    count = 0
    ms = 0
    for read in m:
        count += 1
        ms += read.num_maps
    assert count == 2000, count
    assert ms == 3590, ms
Esempio n. 40
0
def test_merging_maps_chr21():
    s = ["chr21_mapping_initial.map",
          "chr21_mapping_denovo.map",
          "chr21_mapping_initial_split.map",
          "chr21_mapping_trim_20.map",
          "chr21_mapping_trim_20_split.map"
         ]
    fs = []
    for f in s:
        fs.append(files.open(testfiles[f]))
    m = gem.merger(fs[0], fs[1:])
    count = 0
    ms = 0
    for read in m:
        count += 1
        ms += read.num_maps
    assert count == 2000, count
    assert ms == 3590, ms
Esempio n. 41
0
def test_fastq_filter_unmapped():
    reads = files.open(testfiles["reads_1.fastq"])
    num_reads = sum(1 for r in reads.clone())
    sum_length = sum(1 for r in filter.unmapped(reads.clone()))
    assert num_reads == sum_length
    assert sum_length == 10000
Esempio n. 42
0
def test_sam2bam_sort_async_execution():
    input = files.open(testfiles["reads_1.sam"])
    bam = gem.sam2bam(input, sorted=True)
    assert bam is not None
    assert sum(1 for x in bam) == 10000
Esempio n. 43
0
def test_reader_cloning():
    reader = files.open(testfiles["reads_1.fastq"])
    assert len(list(reader)) == 10000
    clone = reader.clone()
    assert len(list(clone)) == 10000
Esempio n. 44
0
def test_cat():
    reads_1 = files.open(testfiles["reads_1.fastq"])
    reads_2 = files.open(testfiles["reads_2.fastq"])
    num_reads = sum(1 for r in filter.cat([reads_1, reads_2]))
    assert num_reads == 20000
Esempio n. 45
0
def test_sam2bam_sort_async_execution():
    input = files.open(testfiles["reads_1.sam"])
    bam = gem.sam2bam(input, sorted=True)
    assert bam is not None
    assert sum(1 for x in bam) == 10000
Esempio n. 46
0
def test_fastq_filtering():
    reads = files.open(testfiles["reads_1.fastq"])
    num_reads = sum(1 for r in reads.clone())
    sum_length = sum(r.length for r in reads.clone())
    assert num_reads == 10000, num_reads
    assert sum_length == 750000, sum_length
Esempio n. 47
0
def test_file_merge_async():
    reads_1 = files.open(testfiles["test.map"])
    reads_2 = files.open(testfiles["test.map"])
    merged = gem.merge(reads_1, [reads_2])
    num_reads = sum(1 for r in merged)
    assert num_reads == 10
Esempio n. 48
0
def test_cat():
    reads_1 = files.open(testfiles["reads_1.fastq"])
    reads_2 = files.open(testfiles["reads_2.fastq"])
    num_reads = sum(1 for r in filter.cat([reads_1, reads_2]))
    assert num_reads == 20000
Esempio n. 49
0
def test_quality_pass_on_execution():
    input = files.open(testfiles["reads_1.fastq"])
    mappings = gem.mapper(input, index, output=results_dir+"/quality_passon_mapping.map")
    assert mappings.quality == "offset-33", "Quality should be 'offset-33' but is %s" % (str(mappings.quality))
Esempio n. 50
0
def test_fastq_trim_both():
    reads = files.open(testfiles["reads_1.fastq"])
    sum_length = sum(r.length for r in filter.trim(reads, 10, 10))
    assert sum_length == 550000, sum_length
Esempio n. 51
0
def test_fastq_filter_interleave():
    reads_1 = files.open(testfiles["reads_1.fastq"])
    reads_2 = files.open(testfiles["reads_2.fastq"])
    num_reads = sum(1 for r in filter.interleave([reads_1, reads_2]))
    assert num_reads == 20000