def run(self, args): if len(args.input) < 2: logging.error("You have to specify at least 2 files") files = args.input output = args.output if output is None: output = sys.stdout gem.merge(files[0], files[1:], output, threads=int(args.threads), same_content=args.same, paired=args.paired, compress=args.compress)
def test_file_merge_pairwise_same(): reads_1 = files.open(testfiles["test.map"]) reads_2 = files.open(testfiles["test.map"]) merged = gem.merge(reads_1, [reads_2], output=results_dir + "/merge_result.map", threads=8, paired=True) num_reads = sum(1 for r in merged) assert num_reads == 10
def test_file_merge_pairwise_same_content_big_sub(): reads_1 = files.open(testfiles["20t.map.gz"]) reads_2 = files.open(testfiles["20t_sub.map.gz"]) merged = gem.merge(reads_1, [reads_2], output=results_dir + "/merge_result.map", threads=8, paired=True) num_reads = sum(1 for r in merged) assert num_reads == 20000
def test_file_merge_pairwise_same_content_big_uncompressed(): subprocess.call("cp %s %s; gunzip %s;" % (testfiles["20t.map.gz"], results_dir, results_dir + "/20t.map.gz"), shell=True) reads_1 = files.open(results_dir + "/20t.map") reads_2 = files.open(results_dir + "/20t.map") merged = gem.merge(reads_1, [reads_2], output=results_dir + "/merge_result.map", threads=8, same_content=True) num_reads = sum(1 for r in merged) assert num_reads == 20000
def test_merging_maps_chr21(): s = [ "chr21_mapping_initial.map", "chr21_mapping_denovo.map", "chr21_mapping_initial_split.map", "chr21_mapping_trim_20.map", "chr21_mapping_trim_20_split.map" ] fs = [] for f in s: fs.append(files.open(testfiles[f])) m = gem.merge(fs[0], fs[1:]) count = 0 ms = 0 for read in m: count += 1 ms += read.num_maps assert count == 2000, count assert ms == 3590, ms
def test_merging_maps_chr21(): s = ["chr21_mapping_initial.map", "chr21_mapping_denovo.map", "chr21_mapping_initial_split.map", "chr21_mapping_trim_20.map", "chr21_mapping_trim_20_split.map" ] fs = [] for f in s: fs.append(files.open(testfiles[f])) m = gem.merge(fs[0], fs[1:]) count = 0 ms = 0 for read in m: count += 1 ms += read.num_maps assert count == 2000, count assert ms == 3590, ms
def test_merging_maps(): input = files.open(testfiles["test_merge_target.map"]) source_1 = files.open(testfiles["test_merge_source_1.map"]) source_2 = files.open(testfiles["test_merge_source_2.map"]) #result = gem.mapper(input, index, results_dir + "/merged.mapping") merger = gem.merge(input, [source_1, source_2]) count = 0 for read in merger: count += 1 if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450": assert read.to_map().split("\t")[3] == "0+1", read.to_map() assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452": assert read.to_map().split("\t")[3] == "0+1", read.to_map() assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259": assert read.to_map().split("\t")[3] == "1", read.to_map() assert read.to_map().split("\t")[4] == "chr15:+:72492866:75" assert count == 10
def test_merging_maps_to_file(): input = files.open(testfiles["test_merge_target.map"]) source_1 = files.open(testfiles["test_merge_source_1.map"]) source_2 = files.open(testfiles["test_merge_source_2.map"]) result = results_dir + "/merged.mapping" merger = gem.merge(input, [source_1, source_2], output=result) count = 0 for read in merger: count += 1 if read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13866:124450 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "0+1" assert read.to_map().split("\t")[4] == "chr2:-:162359617:74G" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:13753:124452 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "0+1" assert read.to_map().split("\t")[4] == "chr9:+:38397301:54G20" elif read.tag == "HWI-ST661:153:D0FTJACXX:2:1102:14211:124259 1:N:0:GCCAAT": assert read.to_map().split("\t")[3] == "1" assert read.to_map().split("\t")[4] == "chr15:+:72492866:75" assert count == 10 assert os.path.exists(result)
def test_file_merge_async(): reads_1 = files.open(testfiles["test.map"]) reads_2 = files.open(testfiles["test.map"]) merged = gem.merge(reads_1, [reads_2]) num_reads = sum(1 for r in merged) assert num_reads == 10