def test_multigrmpy_pg_het_ins(self): import multigrmpy with tempfile.TemporaryDirectory() as output_dir: args = ADict({ "input": self.hg38_input_vcf, "manifest": self.hg38_manifest, "reference": self.hg38_reference, "output": output_dir, "grmpy": os.path.join(os.path.dirname(__file__), "module-wrapper.sh") + " " + os.path.join(GRMPY_INSTALL, "bin", "grmpy"), "verbose": False, "quiet": True, "logfile": None, "write_alignments": False, "infer_read_haplotypes": False, "graph_sequence_matching": True, "klib_sequence_matching": False, "kmer_sequence_matching": False, "bad_align_uniq_kmer_len": 0, "threads": 1, "sample_threads": 1, "genotyping_parameters": None, "max_reads_per_event": 10000, "split_type": "lines", "read_length": 150, "max_ref_node_length": 1000, "graph_type": "alleles", "alt_splitting": True, "retrieve_reference_sequence": False, "scratch_dir": None, "keep_scratch": True, }) output_json_path = os.path.join(output_dir, "genotypes.json.gz") multigrmpy.run(args) # compare genotyping results with gzip.open(output_json_path, 'rt') as result_json: observed = json.load(result_json) with open(self.hg38_expected_genotypes_json, 'rt') as expected_json: expected = json.load(expected_json) # uncomment to keep observed json # check if genotypes are the same observed_lines = json.dumps(observed, sort_keys=True, indent=4, separators=(',', ': ')) expected_lines = json.dumps(expected, sort_keys=True, indent=4, separators=(',', ': ')) if observed_lines != expected_lines: for line in difflib.context_diff( expected_lines.split("\n"), observed_lines.split("\n"), fromfile=self.hg38_expected_genotypes_json, tofile=output_json_path): sys.stderr.write(line + "\n") with open("test_gt.json", "wt") as out_file: json.dump(observed, out_file, sort_keys=True, indent=4, separators=(',', ': ')) raise Exception( "Genotyping results don't match! If this is expected and new behavior, " "cp test_gt.json %s" % self.hg38_expected_genotypes_json) # check if variants are the same output_json_path = os.path.join(output_dir, "variants.json.gz") with gzip.open(output_json_path, 'rt') as result_json: observed = json.load(result_json) # uncomment to keep observed json with open(self.hg38_expected_variants_json, 'rt') as expected_json: expected = json.load(expected_json) # contains temp file locations del observed[0]["graph"]["model_name"] if "model_name" in expected[0]["graph"]: del expected[0]["graph"]["model_name"] observed_lines = json.dumps(observed, sort_keys=True, indent=4, separators=(',', ': ')) expected_lines = json.dumps(expected, sort_keys=True, indent=4, separators=(',', ': ')) if observed_lines != expected_lines: for line in difflib.context_diff( expected_lines.split("\n"), observed_lines.split("\n"), fromfile=self.hg38_expected_genotypes_json, tofile=output_json_path): sys.stderr.write(line + "\n") with open("test_variants.json", "wt") as out_file: json.dump(observed, out_file, sort_keys=True, indent=4, separators=(',', ': ')) raise Exception( "Converted variants don't match! If this is expected and new behavior, " "cp test_variants.json %s" % self.hg38_expected_variants_json)
def test_multigrmpy(self): import multigrmpy with tempfile.TemporaryDirectory() as output_dir: args = ADict({ "input": self.input_vcf, "manifest": self.manifest, "reference": self.reference, "output": output_dir, "grmpy": os.path.join(os.path.dirname(__file__), "module-wrapper.sh") + " " + os.path.join(GRMPY_INSTALL, "bin", "grmpy"), "verbose": False, "quiet": True, "logfile": None, "infer_read_haplotypes": False, "write_alignments": False, "graph_sequence_matching": True, "klib_sequence_matching": False, "kmer_sequence_matching": False, "bad_align_uniq_kmer_len": 0, "threads": 1, "sample_threads": 1, "genotyping_parameters": None, "max_reads_per_event": 10000, "split_type": "lines", "read_length": 150, "max_ref_node_length": 1000, "graph_type": "alleles", "alt_splitting": True, "retrieve_reference_sequence": False, "scratch_dir": None, "keep_scratch": True, }) output_json_path = os.path.join(output_dir, "genotypes.json.gz") multigrmpy.run(args) with gzip.open(output_json_path, 'rt') as result_json: observed = json.load(result_json) for item in observed: if item["graphinfo"]["ID"] == "test-ins": self.assertEqual( item["samples"]["sample1"]["gt"]["GT"], "S1/S1") self.assertEqual( item["samples"]["sample2"]["gt"]["GT"], "./.") if item["graphinfo"]["ID"] == "test-del": self.assertEqual( item["samples"]["sample1"]["gt"]["GT"], "./.") self.assertEqual( item["samples"]["sample2"]["gt"]["GT"], "S1/S1")
def test_multigrmpy_expected_genotypes(self): import multigrmpy with tempfile.TemporaryDirectory() as output_dir: args = ADict({ "input": self.swaps_input_vcf, "manifest": self.swaps_manifest, "reference": self.swaps_reference, "output": output_dir, "grmpy": os.path.join(os.path.dirname(__file__), "module-wrapper.sh") + " " + os.path.join(GRMPY_INSTALL, "bin", "grmpy"), "verbose": False, "quiet": True, "logfile": None, "infer_read_haplotypes": False, "write_alignments": False, "graph_sequence_matching": True, "klib_sequence_matching": False, "kmer_sequence_matching": False, "bad_align_uniq_kmer_len": 0, "threads": 1, "sample_threads": 1, "genotyping_parameters": None, "max_reads_per_event": 10000, "split_type": "lines", "read_length": 150, "max_ref_node_length": 1000, "graph_type": "alleles", "alt_splitting": True, "retrieve_reference_sequence": False, "scratch_dir": None, "keep_scratch": False, }) multigrmpy.run(args) output_json_path = os.path.join(output_dir, "genotypes.json.gz") with gzip.open(output_json_path, 'rt') as result_json: observed = json.load(result_json) with open(self.swaps_expected_genotypes_json, 'rt') as expected_json: expected = json.load(expected_json) # event ordering is not guaranteed observed = sorted(observed, key=lambda x: x["graphinfo"]["ID"]) expected = sorted(expected, key=lambda x: x["graphinfo"]["ID"]) match = True for i, o in enumerate(observed): if o["samples"]["SWAPS"]["gt"]["GT"] != expected[i]["samples"][ "SWAPS"]["gt"]["GT"]: match = False break if not match: observed_lines = json.dumps(observed, sort_keys=True, indent=4, separators=(',', ': ')) expected_lines = json.dumps(expected, sort_keys=True, indent=4, separators=(',', ': ')) for line in difflib.context_diff( expected_lines.split("\n"), observed_lines.split("\n"), fromfile=self.swaps_expected_genotypes_json, tofile=output_json_path): sys.stderr.write(line + "\n") with open("test_swaps_genotypes.json", "wt") as out_file: json.dump(observed, out_file, sort_keys=True, indent=4, separators=(',', ': ')) raise Exception( "Swaps test genotyping output doesn't match! If this is expected and new behavior, " "cp test_swaps_genotypes.json %s" % self.swaps_expected_genotypes_json) output_vcf_path = os.path.join(output_dir, "genotypes.vcf.gz") with gzip.open(output_vcf_path, 'rt') as result_vcf: observed_lines = result_vcf.read().splitlines(keepends=False) observed_lines = [ x for x in observed_lines if not x.startswith("#") ] with open(self.swaps_expected_genotypes_vcf, 'rt') as expected_vcf: expected_lines = expected_vcf.read().splitlines(keepends=False) if observed_lines != expected_lines: for line in difflib.context_diff( expected_lines, observed_lines, fromfile=self.swaps_expected_genotypes_vcf, tofile=output_vcf_path): sys.stderr.write(line + "\n") with open("test_swaps_genotypes.vcf", "wt") as out_file: for x in observed_lines: print(x, file=out_file) raise Exception( "Swaps VCF output doesn't match! If this is expected and new behavior, " "cp test_swaps_genotypes.vcf %s" % self.swaps_expected_genotypes_vcf) output_json_path = os.path.join(output_dir, "variants.json.gz") with gzip.open(output_json_path, 'rt') as result_json: observed = json.load(result_json) with open(self.swaps_expected_variants_json, 'rt') as expected_json: expected = json.load(expected_json) # event ordering is not guaranteed observed = sorted(observed, key=lambda x: x["ID"]) expected = sorted(expected, key=lambda x: x["ID"]) # remove temp file information for o in observed: del o["graph"]["model_name"] for e in expected: if "model_name" in e["graph"]: del e["graph"]["model_name"] observed_lines = json.dumps(observed, sort_keys=True, indent=4, separators=(',', ': ')) expected_lines = json.dumps(expected, sort_keys=True, indent=4, separators=(',', ': ')) if observed_lines != expected_lines: for line in difflib.context_diff( expected_lines.split("\n"), observed_lines.split("\n"), fromfile=self.swaps_expected_variants_json, tofile=output_json_path): sys.stderr.write(line + "\n") with open("test_swaps_variants.json", "wt") as out_file: json.dump(observed, out_file, sort_keys=True, indent=4, separators=(',', ': ')) raise Exception( "Swaps test converted variants don't match! If this is expected and new behavior, " "cp test_swaps_variants.json %s" % self.swaps_expected_variants_json)