def test_main_prints_help_to_stdout(capsys): with pytest.raises(SystemExit) as e: main([]) assert e.value.code != 0 res = capsys.readouterr() assert res.out assert not res.err
def test_run_snappy_itransfer_raw_data_smoke_test(mocker): fake_base_path = "/base/path" dest_path = "/irods/dest" tsv_path = os.path.join(os.path.dirname(__file__), "data", "germline.out") argv = [ "snappy", "itransfer-raw-data", "--num-parallel-transfers", "1", "--base-path", fake_base_path, tsv_path, dest_path, ] # Setup fake file system but only patch selected modules. We cannot use the Patcher approach here as this would # break both biomedsheets and multiprocessing. fs = fake_filesystem.FakeFilesystem() fake_file_paths = [] for member in ("index", "father", "mother"): for ext in ("", ".md5"): fake_file_paths.append( "%s/ngs_mapping/work/input_links/%s-N1-DNA1-WES1/%s-N1-DNA1-WES1.fastq.gz%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_os = fake_filesystem.FakeOsModule(fs) mocker.patch("glob.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_raw_data.os", fake_os) mock_check_output = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) # Actually exercise code and perform test. parser, subparsers = setup_argparse() args = parser.parse_args(argv) res = main(argv) assert not res # We do not care about call order but simply test call count and then assert that all files are there which would # be equivalent of comparing sets of files. assert mock_check_output.call_count == len(fake_file_paths) * 3 for path in fake_file_paths: index, rel_path = os.path.relpath( path, os.path.join(fake_base_path, "ngs_mapping/work/input_links")).split("/", 1) remote_path = os.path.join(dest_path, index, "raw_data", args.remote_dir_date, rel_path) expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)] expected_irsync_argv = [ "irsync", "-a", "-K", path, "i:%s" % remote_path ] expected_ils_argv = ["ils", os.path.dirname(remote_path)] mock_check_output.assert_any_call(expected_mkdir_argv) mock_check_output.assert_any_call(expected_irsync_argv) mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
def test_add_ped_just_update(tmpdir): """Test updating study and assay.""" scratch_dir = tmpdir / "scratch" path_ped = pathlib.Path( __file__).parent / "data" / "isa_tab" / "in_just_update" / "input.ped" shutil.copytree( str( pathlib.Path(__file__).parent / "data" / "isa_tab" / "in_just_update"), str(scratch_dir)) argv = BASE_ARGS + [ str(scratch_dir / "i_Investigation.txt"), str(path_ped) ] # Actually exercise code and perform test. res = main(argv) assert not res compare_input_output( str(scratch_dir), str( pathlib.Path(__file__).parent / "data" / "isa_tab" / "expected_output"), )
def test_run_snappy_itransfer_ngs_mapping_nostep(capsys): sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86" argv = [ "snappy", "itransfer-step", "--sodar-api-token", "XXXX", sodar_uuid, "--tool", "bwa" ] parser, subparsers = setup_argparse() res = main(argv) assert res == 1
def test_run_isatab_annotate_case2_specific(tmp_path): # Testing more specific settings # i.e. selecting a specific study and assay and forced overwrite of annotations # Input isa-tab files path_input = os.path.join(os.path.dirname(__file__), "data", "isa_tab", "annotate_input") path_input = copytree(path_input, os.path.join(tmp_path, "isa_tab_annotate")) # Input annotation path_input_annotation = os.path.join(os.path.dirname(__file__), "data", "isa_tab", "annotate_input", "isa_tab_annotation.csv") # Run annotation argv = [ "isa-tab", "annotate", "--force-update", "--yes", os.path.join(path_input, "i_Investigation.txt"), path_input_annotation, "--target-study", "s_isatest.txt", "--target-assay", "a_isatest_selecting_assay_2.txt", ] res = main(argv) assert not res # Reference files path_test = os.path.join(os.path.dirname(__file__), "data", "isa_tab", "annotate_result2") # Tests files = glob.glob(os.path.join(path_test, "*")) match, mismatch, errors = filecmp.cmpfiles(path_test, path_input, (os.path.basename(f) for f in files), shallow=False) print([match, mismatch, errors]) assert len(mismatch) == 0 assert len(errors) == 0
def test_run_seasnap_pull_isa_smoke_test(requests_mock, capsys, fs): # --- setup arguments project_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86" argv = ["sea-snap", "pull-isa", "--sodar-api-token", "XXX", project_uuid] parser, subparsers = setup_argparse() args = parser.parse_args(argv) # --- add test content path_json = os.path.join(os.path.dirname(__file__), "data", "isa_test.json") fs.add_real_file(path_json) with open(path_json, "rt") as inputf: json_text = inputf.read() # --- mock modules url = URL_TPL % { "sodar_url": args.sodar_url, "project_uuid": project_uuid, "api_key": "XXX" } requests_mock.get(url, text=json_text) # --- run tests res = main(argv) assert not res test_dir = os.path.join(os.path.dirname(__file__), "data", "ISA_files_test") fs.add_real_directory(test_dir) files = glob.glob(os.path.join(test_dir, "*")) match, mismatch, errors = filecmp.cmpfiles("ISA_files", test_dir, (os.path.basename(f) for f in files), shallow=False) print([match, mismatch, errors]) assert len(mismatch) == 0 assert len(errors) == 0 res = capsys.readouterr() assert not res.err
def test_run_seasnap_working_dir_smoke_test(capsys, fs): # --- setup arguments seasnap_dir = "fake_seasnap" seasnap_files = [ "mapping_config.yaml", "DE_config.yaml", "cluster_config.json", "mapping_pipeline.snake", "sea-snap.py", ] argv = ["sea-snap", "working-dir", seasnap_dir] parser, subparsers = setup_argparse() args = parser.parse_args(argv) # --- add test files fs.create_dir(seasnap_dir) for f in seasnap_files: fs.create_file(os.path.join(seasnap_dir, f)) # --- run tests res = main(argv) assert not res # test dir created wd = time.strftime(args.dirname) assert Path(wd).is_dir() # test files copied seasnap_files = seasnap_files[:3] for f in seasnap_files: p = os.path.join(wd, f) assert Path(p).is_file() # test symlink created p = os.path.join(wd, "sea-snap") assert Path(p).is_symlink() res = capsys.readouterr() assert not res.err
def test_run_isatab_annotate_case1_default(tmp_path): # Testing under default settings # i.e. first study, first assay, no forced overwrite of annotations # Input isa-tab files path_input = os.path.join(os.path.dirname(__file__), "data", "isa_tab", "annotate_input") path_input = copytree(path_input, os.path.join(tmp_path, "isa_tab_annotate")) # Input annotation path_input_annotation = os.path.join(os.path.dirname(__file__), "data", "isa_tab", "annotate_input", "isa_tab_annotation.csv") # Run annotation argv = [ "isa-tab", "annotate", "--yes", os.path.join(path_input, "i_Investigation.txt"), path_input_annotation, ] res = main(argv) assert not res # Reference files path_test = os.path.join(os.path.dirname(__file__), "data", "isa_tab", "annotate_result1") # Tests files = glob.glob(os.path.join(path_test, "*")) match, mismatch, errors = filecmp.cmpfiles(path_test, path_input, (os.path.basename(f) for f in files), shallow=False) print([match, mismatch, errors]) assert len(mismatch) == 0 assert len(errors) == 0
def test_run_archive_summary_smoke_test(): filename = "summary.tbl" with tempfile.TemporaryDirectory() as tmp_dir: repo_dir = os.path.join(os.path.dirname(__file__), "data", "archive") target_file = os.path.join(repo_dir, filename) mocked_file = os.path.join(tmp_dir, filename) argv = [ "archive", "summary", "--class", os.path.join(repo_dir, "classes.yaml"), os.path.join(repo_dir, "project"), mocked_file, ] setup_argparse() # --- run tests res = main(argv) assert not res mocked = [ line.rstrip().split("\t") for line in open(mocked_file, "rt") ][1:] target = [ line.rstrip().split("\t") for line in open(target_file, "rt") ][1:] assert len(mocked) == len(target) j = target[0].index("ResolvedName") failed = [] for value in target: found = False for v in mocked: if v[-j] == value[-j]: found = True break if not found: failed.append(value) assert len(failed) == 0
def test_run_archive_readme_smoke_test(): with tempfile.TemporaryDirectory() as tmp_dir: project_name = "project" project_dir = os.path.join(os.path.dirname(__file__), "data", "archive", project_name) readme_path = os.path.join(tmp_dir, project_name, "README.md") argv = [ "--sodar-server-url", "https://sodar.bihealth.,org", "archive", "readme", "--var-PI-name", "Maxene Musterfrau", "--var-archiver-name", "Eric Blanc", "--var-client-name", "Max Mustermann", "--var-SODAR-UUID", "00000000-0000-0000-0000-000000000000", "--var-Gitlab-URL", "https://cubi-gitlab.bihealth.org", "--var-start-date", "1970-01-01", "--no-input", project_dir, readme_path, ] setup_argparse() # --- run tests res = main(argv) assert not res assert cubi_tk.archive.readme.is_readme_valid(readme_path)
def test_run_seasnap_write_sample_info_smoke_test(capsys, requests_mock, fs): # --- setup arguments project_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86" in_path_pattern = os.path.join(os.path.dirname(__file__), "data", "fastq_test", "{sample}_{mate,R1|R2}") argv = [ "sea-snap", "write-sample-info", "--sodar-auth-token", "XXX", "--project_uuid", project_uuid, in_path_pattern, "-", ] parser, subparsers = setup_argparse() args = parser.parse_args(argv) # --- add test content and files path_json = os.path.join(os.path.dirname(__file__), "data", "isa_test.json") fs.add_real_file(path_json) with open(path_json, "rt") as inputf: json_text = inputf.read() path_fastq_test = os.path.join(os.path.dirname(__file__), "data", "fastq_test") fs.add_real_directory(path_fastq_test) target_file = os.path.join(os.path.dirname(__file__), "data", "sample_info_test.yaml") fs.add_real_file(target_file) # --- mock modules url = URL_TPL % { "sodar_url": args.sodar_url, "project_uuid": project_uuid, "api_key": "XXX" } requests_mock.get(url, text=json_text) # --- run as end-to-end test res = main(argv) assert not res # test content of generated file with open(target_file, "r") as f: expected_result = f.read() res = capsys.readouterr() assert not res.err assert expected_result == res.out # test whether ISA files were pulled correctly test_dir = os.path.join(os.path.dirname(__file__), "data", "ISA_files_test") fs.add_real_directory(test_dir) files = glob.glob(os.path.join(test_dir, "*")) match, mismatch, errors = filecmp.cmpfiles("ISA_files", test_dir, (os.path.basename(f) for f in files), shallow=False) print([match, mismatch, errors]) assert len(mismatch) == 0 assert len(errors) == 0
def test_run_sodar_ingest_fastq_smoke_test(mocker, fs): # --- setup arguments irods_path = "/irods/dest" dest_path = "target/folder/generic_file.fq.gz" fake_base_path = "/base/path" argv = [ "--verbose", "sodar", "ingest-fastq", "--yes", "--remote-dir-pattern", dest_path, fake_base_path, irods_path, ] parser, subparsers = setup_argparse() # --- add test files fake_file_paths = [] for member in ("sample1", "sample2", "sample3"): for ext in ("", ".md5"): fake_file_paths.append("%s/%s/%s-N1-RNA1-RNA_seq1.fastq.gz%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_file_paths.append("%s/%s/%s-N1-DNA1-WES1.fq.gz%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) # Remove index's log MD5 file again so it is recreated. fs.remove(fake_file_paths[3]) # --- mock modules mock_check_output = mock.mock_open() # mocker.patch("cubi_tk.sodar.ingest_fastq.check_output", mock_check_output) mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) mock_check_call = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call) # necessary because independent test fail mock_value = mock.mock_open() mocker.patch("cubi_tk.sodar.ingest_fastq.Value", mock_value) mocker.patch("cubi_tk.snappy.itransfer_common.Value", mock_value) # --- run tests res = main(argv) assert not res assert fs.exists(fake_file_paths[3]) assert mock_check_call.call_count == 1 assert mock_check_call.call_args[0] == ([ "md5sum", "sample1-N1-DNA1-WES1.fq.gz" ], ) assert mock_check_output.call_count == len(fake_file_paths) * 3 * 5 remote_path = os.path.join(irods_path, dest_path) for path in fake_file_paths: expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)] ext = ".md5" if path.split(".")[-1] == "md5" else "" expected_irsync_argv = [ "irsync", "-a", "-K", path, ("i:%s" + ext) % remote_path ] expected_ils_argv = ["ils", os.path.dirname(remote_path)] assert ((expected_mkdir_argv, ), ) in mock_check_output.call_args_list assert ((expected_irsync_argv, ), ) in mock_check_output.call_args_list assert ((expected_ils_argv, ), { "stderr": -2 }) in mock_check_output.call_args_list
def test_run_snappy_itransfer_raw_data_smoke_test(mocker, minimal_config, germline_trio_sheet_tsv): fake_base_path = "/base/path" dest_path = "/irods/dest" sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86" argv = [ "snappy", "itransfer-raw-data", "--num-parallel-transfers", "1", "--base-path", fake_base_path, "--sodar-api-token", "XXXX", sodar_uuid, ] # Setup fake file system but only patch selected modules. We cannot use the Patcher approach here as this would # break both biomedsheets and multiprocessing. fs = fake_filesystem.FakeFilesystem() fake_file_paths = [] for member in ("index", "father", "mother"): for ext in ("", ".md5"): fake_file_paths.append( "%s/ngs_mapping/work/input_links/%s-N1-DNA1-WES1/%s-N1-DNA1-WES1.fastq.gz%s" % (fake_base_path, member, member, ext) ) fs.create_file(fake_file_paths[-1]) # Create sample sheet in fake file system sample_sheet_path = fake_base_path + "/.snappy_pipeline/sheet.tsv" fs.create_file(sample_sheet_path, contents=germline_trio_sheet_tsv, create_missing_dirs=True) # Create config in fake file system config_path = fake_base_path + "/.snappy_pipeline/config.yaml" fs.create_file(config_path, contents=minimal_config, create_missing_dirs=True) # Set Mocker mocker.patch("pathlib.Path.exists", my_exists) mocker.patch( "cubi_tk.snappy.itransfer_common.SnappyItransferCommandBase.get_sodar_info", my_get_sodar_info, ) fake_os = fake_filesystem.FakeOsModule(fs) mocker.patch("glob.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_raw_data.os", fake_os) fake_open = fake_filesystem.FakeFileOpen(fs) mocker.patch("cubi_tk.snappy.common.open", fake_open) mock_check_output = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) # Actually exercise code and perform test. parser, subparsers = setup_argparse() args = parser.parse_args(argv) res = main(argv) assert not res # We do not care about call order but simply test call count and then assert that all files are there which would # be equivalent of comparing sets of files. assert mock_check_output.call_count == len(fake_file_paths) * 3 for path in fake_file_paths: index, rel_path = os.path.relpath( path, os.path.join(fake_base_path, "ngs_mapping/work/input_links") ).split("/", 1) remote_path = os.path.join(dest_path, index, "raw_data", args.remote_dir_date, rel_path) expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)] expected_irsync_argv = ["irsync", "-a", "-K", path, "i:%s" % remote_path] expected_ils_argv = ["ils", os.path.dirname(remote_path)] mock_check_output.assert_any_call(expected_mkdir_argv) mock_check_output.assert_any_call(expected_irsync_argv) mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
def test_run_snappy_itransfer_ngs_mapping_smoke_test(mocker): fake_base_path = "/base/path" dest_path = "/irods/dest" tsv_path = os.path.join(os.path.dirname(__file__), "data", "germline.out") argv = [ "--verbose", "snappy", "itransfer-ngs-mapping", "--base-path", fake_base_path, tsv_path, dest_path, ] # Setup fake file system but only patch selected modules. We cannot use the Patcher approach here as this would # break both biomedsheets and multiprocessing. fs = fake_filesystem.FakeFilesystem() fake_file_paths = [] for member in ("index", "father", "mother"): for ext in ("", ".md5"): fake_file_paths.append( "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/out/%s-N1-DNA1-WES1.bam%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_file_paths.append( "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/log/bwa.%s-N1-DNA1-WES1.log%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) # Remove index's log MD5 file again so it is recreated. fs.remove(fake_file_paths[3]) fake_os = fake_filesystem.FakeOsModule(fs) mocker.patch("glob.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_ngs_mapping.os", fake_os) mock_check_output = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) fake_open = fake_filesystem.FakeFileOpen(fs) mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open) mock_check_call = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call) # Actually exercise code and perform test. parser, subparsers = setup_argparse() args = parser.parse_args(argv) res = main(argv) assert not res # We do not care about call order but simply test call count and then assert that all files are there which would # be equivalent of comparing sets of files. assert fs.exists(fake_file_paths[3]) assert mock_check_call.call_count == 1 mock_check_call.assert_called_once_with( ["md5sum", "bwa.index-N1-DNA1-WES1.log"], cwd=os.path.dirname(fake_file_paths[3]), stdout=ANY, ) assert mock_check_output.call_count == len(fake_file_paths) * 3 for path in fake_file_paths: mapper_index, rel_path = os.path.relpath( path, os.path.join(fake_base_path, "ngs_mapping/output")).split("/", 1) _mapper, index = mapper_index.rsplit(".", 1) remote_path = os.path.join(dest_path, index, "ngs_mapping", args.remote_dir_date, rel_path) expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)] expected_irsync_argv = [ "irsync", "-a", "-K", path, "i:%s" % remote_path ] expected_ils_argv = ["ils", os.path.dirname(remote_path)] mock_check_output.assert_any_call(expected_mkdir_argv) mock_check_output.assert_any_call(expected_irsync_argv) mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
def test_run_archive_prepare_smoke_test(): with tempfile.TemporaryDirectory() as tmp_dir: repo_dir = os.path.join(os.path.dirname(__file__), "data", "archive") project_name = "project" argv = [ "archive", "prepare", "--rules", os.path.join(repo_dir, "rules.yaml"), "--no-readme", os.path.join(repo_dir, project_name), os.path.join(tmp_dir, "temp_dest"), ] setup_argparse() # --- run tests res = main(argv) assert not res # --- remove hashdeep report filename timestamp os.rename( os.path.join( tmp_dir, "temp_dest", datetime.date.today().strftime( "%Y-%m-%d_hashdeep_report.txt")), os.path.join(tmp_dir, "temp_dest", "1970-01-01_hashdeep_report.txt"), ) # --- compare hashdeep report with reference (repo_titles, repo_body) = sort_hashdeep_title_and_body( os.path.join(repo_dir, "temp_dest_verif", "1970-01-01_hashdeep_report.txt")) (tmp_titles, tmp_body) = sort_hashdeep_title_and_body( os.path.join(tmp_dir, "temp_dest", "1970-01-01_hashdeep_report.txt")) # No test on gzipped files, timestamp stored on gzip format could be different assert repo_body == tmp_body prefix = os.path.join(repo_dir, "temp_dest_verif") ref_fns = [ os.path.relpath(x, start=prefix) for x in filter( lambda x: os.path.isfile(x) or os.path.islink(x), glob.glob(prefix + "/**/*", recursive=True), ) ] prefix = os.path.join(tmp_dir, "temp_dest") test_fns = [ os.path.relpath(x, start=prefix) for x in filter( lambda x: os.path.isfile(x) or os.path.islink(x), glob.glob(prefix + "/**/*", recursive=True), ) ] assert sorted(ref_fns) == sorted(test_fns) matches, mismatches, errors = filecmp.cmpfiles( os.path.join(repo_dir, "temp_dest_verif"), os.path.join(tmp_dir, "temp_dest"), common=ref_fns, shallow=False, ) assert len(matches) > 0 assert sorted(errors) == [ "extra_data/to_ignored_dir", "extra_data/to_ignored_file" ] assert sorted(mismatches) == [ "1970-01-01_hashdeep_report.txt", "pipeline/output/sample2" ]
def test_run_snappy_itransfer_variant_calling_smoke_test( mocker, minimal_config, germline_trio_sheet_tsv): fake_base_path = "/base/path" dest_path = "/irods/dest" sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86" argv = [ "--verbose", "snappy", "itransfer-variant-calling", "--base-path", fake_base_path, "--sodar-api-token", "XXXX", # tsv_path, sodar_uuid, ] # Setup fake file system but only patch selected modules. We cannot use the Patcher approach here as this would # break both biomedsheets and multiprocessing. fs = fake_filesystem.FakeFilesystem() fake_file_paths = [] for member in ("index", ): for ext in ("", ".md5"): fake_file_paths.append( "%s/variant_calling/output/bwa.gatk_hc.%s-N1-DNA1-WES1/out/bwa.gatk_hc.%s-N1-DNA1-WES1.vcf.gz%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_file_paths.append( "%s/variant_calling/output/bwa.gatk_hc.%s-N1-DNA1-WES1/out/bwa.gatk_hc.%s-N1-DNA1-WES1.vcf.gz.tbi%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_file_paths.append( "%s/variant_calling/output/bwa.gatk_hc.%s-N1-DNA1-WES1/log/bwa.gatk_hc.%s-N1-DNA1-WES1.log%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) # Create sample sheet in fake file system sample_sheet_path = fake_base_path + "/.snappy_pipeline/sheet.tsv" fs.create_file(sample_sheet_path, contents=germline_trio_sheet_tsv, create_missing_dirs=True) # Create config in fake file system config_path = fake_base_path + "/.snappy_pipeline/config.yaml" fs.create_file(config_path, contents=minimal_config, create_missing_dirs=True) # Print path to all created files print("\n".join(fake_file_paths + [sample_sheet_path, config_path])) # Remove index's log MD5 file again so it is recreated. fs.remove(fake_file_paths[3]) # Set Mocker mocker.patch("pathlib.Path.exists", my_exists) mocker.patch( "cubi_tk.snappy.itransfer_common.SnappyItransferCommandBase.get_sodar_info", my_get_sodar_info, ) fake_os = fake_filesystem.FakeOsModule(fs) mocker.patch("glob.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_variant_calling.os", fake_os) mock_check_output = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) fake_open = fake_filesystem.FakeFileOpen(fs) mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open) mocker.patch("cubi_tk.snappy.common.open", fake_open) mock_check_call = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call) # Actually exercise code and perform test. parser, _subparsers = setup_argparse() args = parser.parse_args(argv) res = main(argv) assert not res # We do not care about call order but simply test call count and then assert that all files are there which would # be equivalent of comparing sets of files. assert fs.exists(fake_file_paths[3]) assert mock_check_call.call_count == 1 mock_check_call.assert_called_once_with( ["md5sum", "bwa.gatk_hc.index-N1-DNA1-WES1.vcf.gz"], cwd=os.path.dirname(fake_file_paths[3]), stdout=ANY, ) assert mock_check_output.call_count == len(fake_file_paths) * 3 for path in fake_file_paths: mapper_index, rel_path = os.path.relpath( path, os.path.join(fake_base_path, "variant_calling/output")).split("/", 1) _mapper, index = mapper_index.rsplit(".", 1) remote_path = os.path.join(dest_path, index, "variant_calling", args.remote_dir_date, rel_path) expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)] expected_irsync_argv = [ "irsync", "-a", "-K", path, "i:%s" % remote_path ] expected_ils_argv = ["ils", os.path.dirname(remote_path)] mock_check_output.assert_any_call(expected_mkdir_argv) mock_check_output.assert_any_call(expected_irsync_argv) mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
def test_run_archive_copy_smoke_test(mocker): with tempfile.TemporaryDirectory() as tmp_dir: repo_dir = os.path.join(os.path.dirname(__file__), "data", "archive") argv = [ "archive", "copy", "--keep-workdir-hashdeep", os.path.join(repo_dir, "temp_dest_verif"), os.path.join(tmp_dir, "final_dest"), ] setup_argparse() # --- run tests res = main(argv) assert res == 0 # --- remove timestamps on all hashdeep reports & audits now = datetime.date.today().strftime("%Y-%m-%d") prefix = os.path.join(tmp_dir, "final_dest") for fn in ["hashdeep_audit", "workdir_report", "workdir_audit"]: from_fn = "{}_{}.txt".format(now, fn) to_fn = "{}.txt".format(fn) os.rename(os.path.join(prefix, from_fn), os.path.join(prefix, to_fn)) # --- check report (repo_titles, repo_body) = sort_hashdeep_title_and_body( os.path.join(repo_dir, "final_dest_verif", "workdir_report.txt")) (tmp_titles, tmp_body) = sort_hashdeep_title_and_body( os.path.join(tmp_dir, "final_dest", "workdir_report.txt")) # --- check audits for fn in ["hashdeep_audit", "workdir_audit"]: with open(os.path.join(repo_dir, "final_dest_verif", fn + ".txt"), "r") as f: repo = sorted(f.readlines()) with open(os.path.join(tmp_dir, "final_dest", fn + ".txt"), "r") as f: tmp = sorted(f.readlines()) assert repo == tmp # --- test all copied files, except the hashdeep report & audit, that can differ by line order prefix = os.path.join(repo_dir, "final_dest_verif") ref_fns = [ os.path.relpath(x, start=prefix) for x in filter( lambda x: os.path.isfile(x) or os.path.islink(x), glob.glob(prefix + "/**/*", recursive=True), ) ] ref_fns = filter(lambda x: not IGNORE_FILES_PATTERN.match(x), ref_fns) prefix = os.path.join(tmp_dir, "final_dest") test_fns = [ os.path.relpath(x, start=prefix) for x in filter( lambda x: os.path.isfile(x) or os.path.islink(x), glob.glob(prefix + "/**/*", recursive=True), ) ] test_fns = filter(lambda x: not IGNORE_FILES_PATTERN.match(x), test_fns) matches, mismatches, errors = filecmp.cmpfiles( os.path.join(repo_dir, "final_dest_verif"), os.path.join(tmp_dir, "final_dest"), common=ref_fns, shallow=False, ) assert len(matches) > 0 assert sorted(errors) == [ "extra_data/to_ignored_dir", "extra_data/to_ignored_file" ] assert sorted(mismatches) == ["pipeline/output/sample2"] assert os.path.exists( os.path.join(tmp_dir, "final_dest", "archive_copy_complete"))
def test_run_seasnap_itransfer_results_smoke_test(mocker, fs): # --- setup arguments dest_path = "/irods/dest" fake_base_path = "/base/path" blueprint_path = os.path.join(os.path.dirname(__file__), "data", "test_blueprint.txt") argv = [ "--verbose", "sea-snap", "itransfer-results", blueprint_path, dest_path ] parser, subparsers = setup_argparse() # --- add test files fake_file_paths = [] for member in ("sample1", "sample2", "sample3"): for ext in ("", ".md5"): fake_file_paths.append( "%s/mapping/star/%s/out/star.%s-N1-RNA1-RNA-Seq1.bam%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_file_paths.append( "%s/mapping/star/%s/report/star.%s-N1-RNA1-RNA-Seq1.log%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fs.add_real_file(blueprint_path) Path(blueprint_path).touch() # Remove index's log MD5 file again so it is recreated. fs.remove(fake_file_paths[3]) # --- mock modules mock_check_output = mock.mock_open() mocker.patch("cubi_tk.sea_snap.itransfer_results.check_output", mock_check_output) mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) mock_check_call = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call) # necessary because independent test fail mock_value = mock.mock_open() mocker.patch("cubi_tk.sea_snap.itransfer_results.Value", mock_value) mocker.patch("cubi_tk.snappy.itransfer_common.Value", mock_value) # --- run tests res = main(argv) print(mock_check_output.call_args_list) assert not res assert fs.exists(fake_file_paths[3]) assert mock_check_call.call_count == 1 assert mock_check_call.call_args[0] == ([ "md5sum", "star.sample1-N1-RNA1-RNA-Seq1.log" ], ) assert mock_check_output.call_count == len(fake_file_paths) * 2 remote_path = os.path.join(dest_path, "fakedest") for path in fake_file_paths: expected_mkdir_argv = f"imkdir -p $(dirname {remote_path} )" ext = ".md5" if path.split(".")[-1] == "md5" else "" expected_irsync_argv = f"irsync -a -K {path} {('i:%s' + ext) % remote_path}" assert ((expected_mkdir_argv, ), { "shell": True }) in mock_check_output.call_args_list assert ((expected_irsync_argv, ), { "shell": True }) in mock_check_output.call_args_list
def test_run_sodar_ingest_fastq_smoke_test(mocker, requests_mock): # --- setup arguments irods_path = "/irods/dest" landing_zone_uuid = "landing_zone_uuid" dest_path = "target/folder/generic_file.fq.gz" fake_base_path = "/base/path" argv = [ "--verbose", "sodar", "ingest-fastq", "--num-parallel-transfers", "0", "--sodar-api-token", "XXXX", "--yes", "--remote-dir-pattern", dest_path, fake_base_path, landing_zone_uuid, ] parser, _subparsers = setup_argparse() args = parser.parse_args(argv) # Setup fake file system but only patch selected modules. We cannot use the Patcher approach here as this would # break biomedsheets. fs = fake_filesystem.FakeFilesystem() fake_os = fake_filesystem.FakeOsModule(fs) fake_pl = fake_pathlib.FakePathlibModule(fs) # --- add test files fake_file_paths = [] for member in ("sample1", "sample2", "sample3"): for ext in ("", ".md5"): fake_file_paths.append("%s/%s/%s-N1-RNA1-RNA_seq1.fastq.gz%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_file_paths.append("%s/%s/%s-N1-DNA1-WES1.fq.gz%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) # Remove index's log MD5 file again so it is recreated. fs.remove(fake_file_paths[3]) # --- mock modules mocker.patch("glob.os", fake_os) mocker.patch("cubi_tk.sea_snap.itransfer_results.pathlib", fake_pl) mocker.patch("cubi_tk.sea_snap.itransfer_results.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os) mock_check_output = mock.MagicMock(return_value=0) mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) mock_check_call = mock.MagicMock(return_value=0) mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call) mocker.patch("cubi_tk.sodar.ingest_fastq.pathlib", fake_pl) mocker.patch("cubi_tk.sodar.ingest_fastq.os", fake_os) fake_open = fake_filesystem.FakeFileOpen(fs) mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open) # necessary because independent test fail mock_value = mock.MagicMock() mocker.patch("cubi_tk.sodar.ingest_fastq.Value", mock_value) mocker.patch("cubi_tk.snappy.itransfer_common.Value", mock_value) # requests mock return_value = dict( assay="", config_data="", configuration="", date_modified="", description="", irods_path=irods_path, project="", sodar_uuid="", status="", status_info="", title="", user=dict(sodar_uuid="", username="", name="", email=""), ) url = os.path.join(args.sodar_url, "landingzones", "api", "retrieve", args.destination) requests_mock.register_uri("GET", url, text=json.dumps(return_value)) # --- run tests res = main(argv) assert not res # TODO: make mock check_output actually create the file? # assert fs.exists(fake_file_paths[3]) assert mock_check_call.call_count == 1 assert mock_check_call.call_args[0] == ([ "md5sum", "sample1-N1-DNA1-WES1.fq.gz" ], ) assert mock_check_output.call_count == len(fake_file_paths) * 3 remote_path = os.path.join(irods_path, dest_path) for path in fake_file_paths: expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)] ext = ".md5" if path.split(".")[-1] == "md5" else "" expected_irsync_argv = [ "irsync", "-a", "-K", path, ("i:%s" + ext) % remote_path ] expected_ils_argv = ["ils", os.path.dirname(remote_path)] assert ((expected_mkdir_argv, ), ) in mock_check_output.call_args_list assert ((expected_irsync_argv, ), ) in mock_check_output.call_args_list assert ((expected_ils_argv, ), { "stderr": -2 }) in mock_check_output.call_args_list
def test_run_snappy_itransfer_ngs_mapping_smoke_test(mocker, germline_trio_sheet_tsv, minimal_config): fake_base_path = "/base/path" dest_path = "/irods/dest" sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86" argv = [ "--verbose", "snappy", "itransfer-ngs-mapping", "--base-path", fake_base_path, "--sodar-api-token", "XXXX", sodar_uuid, ] parser, subparsers = setup_argparse() args = parser.parse_args(argv) # Setup fake file system but only patch selected modules. We cannot use the Patcher approach here as this would # break both biomedsheets and multiprocessing. fs = fake_filesystem.FakeFilesystem() fake_file_paths = [] for member in ("index", "father", "mother"): for ext in ("", ".md5"): fake_file_paths.append( "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/out/%s-N1-DNA1-WES1.bam%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) fake_file_paths.append( "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/log/bwa.%s-N1-DNA1-WES1.log%s" % (fake_base_path, member, member, ext)) fs.create_file(fake_file_paths[-1]) # Create sample sheet in fake file system sample_sheet_path = fake_base_path + "/.snappy_pipeline/sheet.tsv" fs.create_file(sample_sheet_path, contents=germline_trio_sheet_tsv, create_missing_dirs=True) # Create config in fake file system config_path = fake_base_path + "/.snappy_pipeline/config.yaml" fs.create_file(config_path, contents=minimal_config, create_missing_dirs=True) # Print path to all created files print("\n".join(fake_file_paths + [sample_sheet_path, config_path])) # Remove index's log MD5 file again so it is recreated. fs.remove(fake_file_paths[3]) # Set Mocker mocker.patch("pathlib.Path.exists", my_exists) mocker.patch( "cubi_tk.snappy.itransfer_common.SnappyItransferCommandBase.get_sodar_info", my_get_sodar_info, ) fake_os = fake_filesystem.FakeOsModule(fs) mocker.patch("glob.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os) mocker.patch("cubi_tk.snappy.itransfer_ngs_mapping.os", fake_os) mock_check_output = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output) fake_open = fake_filesystem.FakeFileOpen(fs) mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open) mocker.patch("cubi_tk.snappy.common.open", fake_open) mock_check_call = mock.mock_open() mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call) # # requests mock # return_value = dict(assay="", config_data="", configuration="", date_modified="", description="", irods_path=sodar_path, project="", sodar_uuid="", status="", status_info="", title="", user="") # url_tpl = "%(sodar_url)s/landingzones/api/retrieve/%(landing_zone_uuid)s" # url = url_tpl % {"sodar_url": args.sodar_url, "landing_zone_uuid": args.landing_zone_uuid} # requests_mock.get(url, text=json.dumps(return_value)) # #requests_mock.get("resource://biomedsheets//data/std_fields.json", text="dummy") # #requests_mock.get("resource://biomedsheets/data/std_fields.json#/extraInfoDefs/template/ncbiTaxon", text="dummy") # Actually exercise code and perform test. res = main(argv) assert not res # We do not care about call order but simply test call count and then assert that all files are there which would # be equivalent of comparing sets of files. assert fs.exists(fake_file_paths[3]) assert mock_check_call.call_count == 1 mock_check_call.assert_called_once_with( ["md5sum", "bwa.index-N1-DNA1-WES1.log"], cwd=os.path.dirname(fake_file_paths[3]), stdout=ANY, ) assert mock_check_output.call_count == len(fake_file_paths) * 3 for path in fake_file_paths: mapper_index, rel_path = os.path.relpath( path, os.path.join(fake_base_path, "ngs_mapping/output")).split("/", 1) _mapper, index = mapper_index.rsplit(".", 1) remote_path = os.path.join(dest_path, index, "ngs_mapping", args.remote_dir_date, rel_path) expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)] expected_irsync_argv = [ "irsync", "-a", "-K", path, "i:%s" % remote_path ] expected_ils_argv = ["ils", os.path.dirname(remote_path)] mock_check_output.assert_any_call(expected_mkdir_argv) mock_check_output.assert_any_call(expected_irsync_argv) mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)