def test_interleaved_pair_aligner_run(): input1 = files.open(testfiles["reads_1.fastq"]) input2 = files.open(testfiles["reads_2.fastq"]) mappings = gem.mapper(filter.interleave([input1, input2]), index) paired = gem.pairalign(mappings, index) assert paired is not None assert sum(1 for x in paired) == 20000 # test dataset does not pair at all
def test_interleaving(): input1 = files.open(testfiles["reads_1.fastq"]) input2 = files.open(testfiles["reads_2.fastq"]) s = 0 for t in filter.interleave([input1, input2]): s += 1 assert s == 20000
def test_iterating_fastq(): reader = files.open(testfiles["reads_1.fastq"]) assert reader is not None assert len(list(reader)) == 10000 reader = files.open(testfiles["reads_1.fastq.gz"]) assert reader is not None assert len(list(reader)) == 10000
def test_file_merge_pairwise_same_content_big_uncompressed(): subprocess.call("cp %s %s; gunzip %s;" % (testfiles["20t.map.gz"], results_dir, results_dir + "/20t.map.gz"), shell=True) reads_1 = files.open(results_dir + "/20t.map") reads_2 = files.open(results_dir + "/20t.map") merged = gem.merger(reads_1, [reads_2]).merge(results_dir + "/merge_result.map", threads=8, same_content=True) num_reads = sum(1 for r in merged) assert num_reads == 20000
def test_file_merge_pairwise_same_content_big_sub(): reads_1 = files.open(testfiles["20t.map.gz"]) reads_2 = files.open(testfiles["20t_sub.map.gz"]) merged = gem.merge(reads_1, [reads_2], output=results_dir + "/merge_result.map", threads=8, paired=True) num_reads = sum(1 for r in merged) assert num_reads == 20000
def test_file_merge_pairwise_same(): reads_1 = files.open(testfiles["test.map"]) reads_2 = files.open(testfiles["test.map"]) merged = gem.merge(reads_1, [reads_2], output=results_dir + "/merge_result.map", threads=8, paired=True) num_reads = sum(1 for r in merged) assert num_reads == 10
def test_writing_interleaved_file(): source1 = files.open(testfiles["reads_1.fastq"]) source2 = files.open(testfiles["reads_2.fastq"]) target = results_dir + "/write_interleaved.fastq" out = gt.OutputFile(target) gt.interleave([source1, source2]).write_stream(out, write_map=False) with open(target) as f: lines = f.readlines() assert len(lines) == 80000
def test_interleaved_mapper_run(): input1 = files.open(testfiles["reads_1.fastq"]) input2 = files.open(testfiles["reads_2.fastq"]) mappings = gem.mapper(filter.interleave([input1, input2]), index) assert mappings is not None assert mappings.process is not None assert mappings.filename is None assert sum(1 for x in mappings) == 20000
def test_file_merge_pairwise_same_content_big_uncompressed(): subprocess.call("cp %s %s; gunzip %s;" % (testfiles["20t.map.gz"], results_dir, results_dir + "/20t.map.gz"), shell=True) reads_1 = files.open(results_dir + "/20t.map") reads_2 = files.open(results_dir + "/20t.map") merged = gem.merge(reads_1, [reads_2], output=results_dir + "/merge_result.map", threads=8, same_content=True) num_reads = sum(1 for r in merged) assert num_reads == 20000
def test_writing_fastq_interleaved(): output = results_dir + "/print_fastq.out" input1 = files.open(testfiles["reads_1.fastq"]) input2 = files.open(testfiles["reads_2.fastq"]) filter.interleave([input1, input2]).write_stream(gt.OutputFile(output), write_map=False) s = 0 with open(output) as f: for l in f: s += 1 assert s == 80000, s
def test_cat_to_file(): target = results_dir + "/catted.fastq" reads_1 = files.open(testfiles["reads_1.fastq"]) reads_2 = files.open(testfiles["reads_2.fastq"]) out = gt.OutputFile(target) gt.cat([reads_1, reads_2]).write_stream(out) c = 0 ids = [] with open(target) as f: for l in f: if c % 4 == 0: ids.append(l.strip()) c += 1 assert len(set(ids)) == 20000
def test_async_splitmapper_execution(): input = files.open(testfiles["reads_1.fastq"]) mappings = gem.splitmapper(input, index) assert mappings is not None assert mappings.process is not None assert mappings.filename is None assert sum(1 for x in mappings) == 10000
def test_junction_extraction_from_splitmap(): input = files.open(testfiles["reads_1.fastq"]) index = testfiles["genome.gem"] gtf_junctions = set(junctions.from_gtf(testfiles["refseq.gtf"])) jj = gem.extract_junctions(input, index, merge_with=gtf_junctions) assert junctions is not None assert len(jj) == 260
def test_sam2bam_sort_sync_execution(): result = results_dir + "/reads_1.bam" input = files.open(testfiles["reads_1.sam"]) bam = gem.sam2bam(input, result, True) assert bam is not None assert os.path.exists(result) assert sum(1 for x in bam) == 10000
def test_quality_pass_on_execution(): input = files.open(testfiles["reads_1.fastq"]) mappings = gem.mapper(input, index, output=results_dir + "/quality_passon_mapping.map") assert mappings.quality == "offset-33", "Quality should be 'offset-33' but is %s" % ( str(mappings.quality))
def test_sync_mapper_execution(): input = files.open(testfiles["reads_1.fastq"]) mappings = gem.mapper(input, index, results_dir + "/result.mapping") assert mappings is not None assert mappings.process is not None assert mappings.filename is not None assert mappings.filename == results_dir + "/result.mapping" assert sum(1 for x in mappings) == 10000
def test_writing_input_file(): source = files.open(testfiles["test_merge_target.map"]) target = results_dir + "/write_input.fastq" out = gt.OutputFile(target) source.write_stream(out, write_map=False) with open(target) as f: lines = f.readlines() assert len(lines) == 40
def test_writing_fastq(): output = results_dir + "/print_fastq.out" input1 = files.open(testfiles["reads_1.fastq"]) input1.write_stream(gt.OutputFile(output), write_map=False) s = 0 with open(output) as f: for l in f: s += 1 assert s == 40000
def test_sync_splitmapper_execution(): gem.loglevel("debug") input = files.open(testfiles["reads_1.fastq"]) mappings = gem.splitmapper(input, index, results_dir + "/splitmap_out.mapping") assert mappings is not None assert mappings.process is not None assert mappings.filename == results_dir + "/splitmap_out.mapping" assert os.path.exists(results_dir + "/splitmap_out.mapping") assert sum(1 for x in mappings) == 10000
def test_merging_maps(): input = files.open(testfiles["test_merge_target.map"]) source_1 = files.open(testfiles["test_merge_source_1.map"]) source_2 = files.open(testfiles["test_merge_source_2.map"]) #result = gem.mapper(input, index, results_dir + "/merged.mapping") merger = gem.merge(input, [source_1, source_2]) count = 0 for read in merger: count += 1 if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450": assert read.to_map().split("\t")[3] == "0+1", read.to_map() assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452": assert read.to_map().split("\t")[3] == "0+1", read.to_map() assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259": assert read.to_map().split("\t")[3] == "1", read.to_map() assert read.to_map().split("\t")[4] == "chr15:+:72492866:75" assert count == 10
def test_gem2sam_execution(): input = files.open(testfiles["reads_1.fastq"]) mappings = gem.mapper(input, index) sam = gem.gem2sam(mappings, index, compact=True) assert sam is not None assert sam.process is not None assert sam.filename is None count = 0 for read in sam: count += 1 assert count == 10000
def test_merging_maps_to_file(): input = files.open(testfiles["test_merge_target.map"]) source_1 = files.open(testfiles["test_merge_source_1.map"]) source_2 = files.open(testfiles["test_merge_source_2.map"]) result = results_dir + "/merged.mapping" merger = gem.merge(input, [source_1, source_2], output=result) count = 0 for read in merger: count += 1 if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "0+1" assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "0+1" assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "1" assert read.to_map().split("\t")[4] == "chr15:+:72492866:75" assert count == 10 assert os.path.exists(result)
def test_merging_maps_to_file(): input = files.open(testfiles["test_merge_target.map"]) source_1 = files.open(testfiles["test_merge_source_1.map"]) source_2 = files.open(testfiles["test_merge_source_2.map"]) result = results_dir + "/merged.mapping" merger = gem.merger(input, [source_1, source_2]) count = 0 for read in merger.merge(result): count += 1 if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "0+1" assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "0+1" assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "1" assert read.to_map().split("\t")[4] == "chr15:+:72492866:75" assert count == 10 assert os.path.exists(result)
def test_merging_maps_chr21(): s = [ "chr21_mapping_initial.map", "chr21_mapping_denovo.map", "chr21_mapping_initial_split.map", "chr21_mapping_trim_20.map", "chr21_mapping_trim_20_split.map" ] fs = [] for f in s: fs.append(files.open(testfiles[f])) m = gem.merge(fs[0], fs[1:]) count = 0 ms = 0 for read in m: count += 1 ms += read.num_maps assert count == 2000, count assert ms == 3590, ms
def test_merging_maps_chr21(): s = ["chr21_mapping_initial.map", "chr21_mapping_denovo.map", "chr21_mapping_initial_split.map", "chr21_mapping_trim_20.map", "chr21_mapping_trim_20_split.map" ] fs = [] for f in s: fs.append(files.open(testfiles[f])) m = gem.merger(fs[0], fs[1:]) count = 0 ms = 0 for read in m: count += 1 ms += read.num_maps assert count == 2000, count assert ms == 3590, ms
def test_fastq_filter_unmapped(): reads = files.open(testfiles["reads_1.fastq"]) num_reads = sum(1 for r in reads.clone()) sum_length = sum(1 for r in filter.unmapped(reads.clone())) assert num_reads == sum_length assert sum_length == 10000
def test_sam2bam_sort_async_execution(): input = files.open(testfiles["reads_1.sam"]) bam = gem.sam2bam(input, sorted=True) assert bam is not None assert sum(1 for x in bam) == 10000
def test_reader_cloning(): reader = files.open(testfiles["reads_1.fastq"]) assert len(list(reader)) == 10000 clone = reader.clone() assert len(list(clone)) == 10000
def test_cat(): reads_1 = files.open(testfiles["reads_1.fastq"]) reads_2 = files.open(testfiles["reads_2.fastq"]) num_reads = sum(1 for r in filter.cat([reads_1, reads_2])) assert num_reads == 20000
def test_fastq_filtering(): reads = files.open(testfiles["reads_1.fastq"]) num_reads = sum(1 for r in reads.clone()) sum_length = sum(r.length for r in reads.clone()) assert num_reads == 10000, num_reads assert sum_length == 750000, sum_length
def test_file_merge_async(): reads_1 = files.open(testfiles["test.map"]) reads_2 = files.open(testfiles["test.map"]) merged = gem.merge(reads_1, [reads_2]) num_reads = sum(1 for r in merged) assert num_reads == 10
def test_quality_pass_on_execution(): input = files.open(testfiles["reads_1.fastq"]) mappings = gem.mapper(input, index, output=results_dir+"/quality_passon_mapping.map") assert mappings.quality == "offset-33", "Quality should be 'offset-33' but is %s" % (str(mappings.quality))
def test_fastq_trim_both(): reads = files.open(testfiles["reads_1.fastq"]) sum_length = sum(r.length for r in filter.trim(reads, 10, 10)) assert sum_length == 550000, sum_length
def test_fastq_filter_interleave(): reads_1 = files.open(testfiles["reads_1.fastq"]) reads_2 = files.open(testfiles["reads_2.fastq"]) num_reads = sum(1 for r in filter.interleave([reads_1, reads_2])) assert num_reads == 20000