def test_metadata_for_library_file(self):
        """
         The file tested has metadata just like a library cram, except for some fields that are not used within metacheck
         anyway. It is a txt file, so it will have no header metadata.
        """
        irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_metadata.txt"

        result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath])
        print("Comparisong checks: %s" % self.comparison_checks)
        for fpath, check_results in result.items():
            check_names = [c.check_name for c in check_results]
            self.assertSetEqual(
                set(check_names),
                set(CHECK_NAMES.get_only_mandatory_check_names()))
            for check_res in check_results:
                if check_res.check_name in self.comparison_checks:
                    self.assertFalse(check_res.executed)
                elif check_res.check_name in [
                        CHECK_NAMES.check_ss_irods_group_read_permission,
                        CHECK_NAMES.check_there_is_ss_irods_group, CHECK_NAMES.
                        check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples,
                        CHECK_NAMES.
                        check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape
                ]:
                    self.assertTrue(check_res.executed)
                    self.assertEqual(check_res.result, RESULT.FAILURE)
Esempio n. 2
0
 def test_metadata_when_metadata_ok_with_wrong_reference(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_ok_metadata.out"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath], reference='hs37d5')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name == CHECK_NAMES.check_desired_reference:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
 def test_metadata_when_metadata_ok_with_wrong_reference(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_ok_metadata.out"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath],
                                                 reference='hs37d5')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name == CHECK_NAMES.check_desired_reference:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
Esempio n. 4
0
    def test_fetch_study_metadata_vs_stream_study_metadata(self, stdin):
        fpath = "/nfs/users/nfs_i/ic4/Projects/python3/meta-check/16006_5.json"
        stdin.return_value = open(fpath).read()
        result_fetch_by_metadata = api.check_metadata_fetched_by_path(irods_fpaths=['/seq/16006/16006_5.cram'])
        result_stream_metadata = api.check_metadata_given_as_json_stream()

        self.assertSetEqual(set(result_stream_metadata.keys()), set(result_fetch_by_metadata.keys()))
        print()
        for fpath, results in result_fetch_by_metadata.items():
            self.assertSetEqual(set(results), set(result_stream_metadata[fpath]))
Esempio n. 5
0
 def test_metadata_when_metadata_ok_with_wrong_reference_and_one_replica(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_metadata_missing_md5.out"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath], reference='grch38')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name == CHECK_NAMES.check_desired_reference:
                 self.assertEqual(check_res.executed, False)
             elif check_res.check_name in [CHECK_NAMES.check_more_than_one_replica,
                                           CHECK_NAMES.check_ss_irods_group_read_permission,
                                           CHECK_NAMES.check_there_is_ss_irods_group,
                                           ]:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
    def test_fetch_study_metadata_vs_stream_study_metadata(self, stdin):
        fpath = "/nfs/users/nfs_i/ic4/Projects/python3/meta-check/16006_5.json"
        stdin.return_value = open(fpath).read()
        result_fetch_by_metadata = api.check_metadata_fetched_by_path(
            irods_fpaths=['/seq/16006/16006_5.cram'])
        result_stream_metadata = api.check_metadata_given_as_json_stream()

        self.assertSetEqual(set(result_stream_metadata.keys()),
                            set(result_fetch_by_metadata.keys()))
        print()
        for fpath, results in result_fetch_by_metadata.items():
            self.assertSetEqual(set(results),
                                set(result_stream_metadata[fpath]))
 def test_metadata_when_metadata_ok_with_wrong_reference_and_one_replica(
         self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_metadata_missing_md5.out"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath],
                                                 reference='grch38')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name == CHECK_NAMES.check_desired_reference:
                 self.assertEqual(check_res.executed, False)
             elif check_res.check_name in [
                     CHECK_NAMES.check_more_than_one_replica,
                     CHECK_NAMES.check_ss_irods_group_read_permission,
                     CHECK_NAMES.check_there_is_ss_irods_group,
             ]:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
Esempio n. 8
0
 def test_metadata_when_study_and_samples_dont_match(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_samples_given_wrong_study.cram"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath], reference='grch38')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name in [
                 CHECK_NAMES.check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples,
                 CHECK_NAMES.check_for_samples_in_more_studies,
                 CHECK_NAMES.check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape,
                 CHECK_NAMES.check_there_is_ss_irods_group,
                 CHECK_NAMES.check_ss_irods_group_read_permission,
                 CHECK_NAMES.check_attribute_count
                 ]:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
             else:
                 self.assertEqual(check_res.result, RESULT.SUCCESS)
Esempio n. 9
0
    def test_metadata_when_metadata_ok(self):
        irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_ok_metadata.out"
        result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath])
        for fpath, check_results in result.items():
            check_names = [c.check_name for c in check_results]
            self.assertSetEqual(set(check_names), set(CHECK_NAMES.get_only_mandatory_check_names()))
            for check_res in check_results:
                if check_res.check_name in self.comparison_checks:
                    self.assertFalse(check_res.executed)
                elif check_res.check_name in [
                    CHECK_NAMES.check_there_is_ss_irods_group,
                    CHECK_NAMES.check_ss_irods_group_read_permission,
                    CHECK_NAMES.check_for_samples_in_more_studies,

                ]:
                    self.assertEqual(RESULT.FAILURE, check_res.result)
                else:
                    self.assertEqual(RESULT.SUCCESS, check_res.result)
Esempio n. 10
0
 def test_metadata_when_header_doesnt_match_irods(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_wrong_header.cram"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath],
                                                 reference='grch38')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name in [
                     CHECK_NAMES.check_there_is_ss_irods_group,
                     CHECK_NAMES.check_ss_irods_group_read_permission,
                     CHECK_NAMES.check_irods_ids_compared_to_header_ids,
                     CHECK_NAMES.check_header_ids_compared_to_irods_ids,
                     CHECK_NAMES.check_header_ids_compared_to_seqscape_ids,
                     CHECK_NAMES.check_seqscape_ids_compared_to_header_ids,
                     CHECK_NAMES.check_for_samples_in_more_studies,
                     CHECK_NAMES.check_attribute_count
             ]:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
             else:
                 self.assertEqual(check_res.result, RESULT.SUCCESS)
Esempio n. 11
0
    def test_same_check_results_by_path_and_by_metadata(self):
        fpath = '/humgen/projects/serapis_staging/test-metacheck/test_metadata_comparison.cram'
        check_results_by_metadata = api.check_metadata_fetched_by_metadata(reference='GRCh38', study_name='GDAP_XTEN', irods_zone='humgen')
        check_results_by_path = api.check_metadata_fetched_by_path(irods_fpaths=[fpath], reference='GRCh38')

        file_check_results_by_meta = check_results_by_metadata[fpath]
        file_check_results_by_path = check_results_by_path[fpath]

        def find_check_in_list(check_list, check_searched_name):
            for check in check_list:
                if check.check_name == check_searched_name:
                    return check
            return None

        for check_result in file_check_results_by_meta:
            check_by_path = find_check_in_list(file_check_results_by_path, check_result.check_name)
            self.assertEqual(check_result, check_by_path)

        self.assertEqual(len(file_check_results_by_path), len(file_check_results_by_meta))
Esempio n. 12
0
 def test_metadata_when_metadata_ok(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_ok_metadata.out"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath])
     for fpath, check_results in result.items():
         check_names = [c.check_name for c in check_results]
         self.assertSetEqual(
             set(check_names),
             set(CHECK_NAMES.get_only_mandatory_check_names()))
         for check_res in check_results:
             if check_res.check_name in self.comparison_checks:
                 self.assertFalse(check_res.executed)
             elif check_res.check_name in [
                     CHECK_NAMES.check_there_is_ss_irods_group,
                     CHECK_NAMES.check_ss_irods_group_read_permission,
                     CHECK_NAMES.check_for_samples_in_more_studies,
             ]:
                 self.assertEqual(RESULT.FAILURE, check_res.result)
             else:
                 self.assertEqual(RESULT.SUCCESS, check_res.result)
Esempio n. 13
0
 def test_when_md5_is_wrong(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_wrong_md5.out"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath], reference='grch38')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name in [
                 CHECK_NAMES.check_there_is_ss_irods_group,
                 CHECK_NAMES.check_ss_irods_group_read_permission,
                 CHECK_NAMES.check_for_samples_in_more_studies,
                 CHECK_NAMES.check_replica_checksum_valid,
                 CHECK_NAMES.check_more_than_one_replica,
                 CHECK_NAMES.check_by_comparison_checksum_in_meta_with_checksum_at_upload,
                 CHECK_NAMES.check_attribute_count
                 ]:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
             else:
                 if check_res.executed:
                     self.assertEqual(check_res.result, RESULT.SUCCESS)
                 else:
                     self.assertIsNone(check_res.result)
Esempio n. 14
0
 def test_when_md5_is_wrong(self):
     irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_wrong_md5.out"
     result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath],
                                                 reference='grch38')
     for fpath, check_results in result.items():
         for check_res in check_results:
             if check_res.check_name in [
                     CHECK_NAMES.check_there_is_ss_irods_group,
                     CHECK_NAMES.check_ss_irods_group_read_permission,
                     CHECK_NAMES.check_for_samples_in_more_studies,
                     CHECK_NAMES.check_replica_checksum_valid,
                     CHECK_NAMES.check_more_than_one_replica, CHECK_NAMES.
                     check_by_comparison_checksum_in_meta_with_checksum_at_upload,
                     CHECK_NAMES.check_attribute_count
             ]:
                 self.assertEqual(check_res.result, RESULT.FAILURE)
             else:
                 if check_res.executed:
                     self.assertEqual(check_res.result, RESULT.SUCCESS)
                 else:
                     self.assertIsNone(check_res.result)
Esempio n. 15
0
    def test_metadata_for_library_file(self):
        """
         The file tested has metadata just like a library cram, except for some fields that are not used within metacheck
         anyway. It is a txt file, so it will have no header metadata.
        """
        irods_fpath = "/humgen/projects/serapis_staging/test-metacheck/test_metadata.txt"

        result = api.check_metadata_fetched_by_path(irods_fpaths=[irods_fpath])
        print("Comparisong checks: %s" % self.comparison_checks)
        for fpath, check_results in result.items():
            check_names = [c.check_name for c in check_results]
            self.assertSetEqual(set(check_names), set(CHECK_NAMES.get_only_mandatory_check_names()))
            for check_res in check_results:
                if check_res.check_name in self.comparison_checks:
                    self.assertFalse(check_res.executed)
                elif check_res.check_name in [
                    CHECK_NAMES.check_ss_irods_group_read_permission,
                    CHECK_NAMES.check_there_is_ss_irods_group,
                    CHECK_NAMES.check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples,
                    CHECK_NAMES.check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape
                ]:
                    self.assertTrue(check_res.executed)
                    self.assertEqual(check_res.result, RESULT.FAILURE)
Esempio n. 16
0
    def test_same_check_results_by_path_and_by_metadata(self):
        fpath = '/humgen/projects/serapis_staging/test-metacheck/test_metadata_comparison.cram'
        check_results_by_metadata = api.check_metadata_fetched_by_metadata(
            reference='GRCh38', study_name='GDAP_XTEN', irods_zone='humgen')
        check_results_by_path = api.check_metadata_fetched_by_path(
            irods_fpaths=[fpath], reference='GRCh38')

        file_check_results_by_meta = check_results_by_metadata[fpath]
        file_check_results_by_path = check_results_by_path[fpath]

        def find_check_in_list(check_list, check_searched_name):
            for check in check_list:
                if check.check_name == check_searched_name:
                    return check
            return None

        for check_result in file_check_results_by_meta:
            check_by_path = find_check_in_list(file_check_results_by_path,
                                               check_result.check_name)
            self.assertEqual(check_result, check_by_path)

        self.assertEqual(len(file_check_results_by_path),
                         len(file_check_results_by_meta))
Esempio n. 17
0
def main():
    args = arg_parser.parse_args()
    try:
        filter_npg_qc = args.filter_npg_qc
    except AttributeError:
        filter_npg_qc = None

    try:
        filter_target = args.filter_target
    except AttributeError:
        filter_target = None
    try:
        file_types = args.file_types
    except AttributeError:
        file_types = None

    try:
        study_name = args.study_name
    except AttributeError:
        study_name = None

    try:
        study_acc_nr = args.study_acc_nr
    except AttributeError:
        study_acc_nr = None

    try:
        study_internal_id = args.study_internal_id
    except AttributeError:
        study_internal_id = None

    try:
        irods_fpaths = args.irods_fpaths
    except AttributeError:
        irods_fpaths = None

    try:
        irods_zone = args.irods_zone
    except AttributeError:
        irods_zone = None

    try:
        reference = args.desired_reference
    except AttributeError:
        reference = None

    if args.metadata_fetching_strategy == 'fetch_by_metadata':
        if not file_types:
            print(
                "WARNING! You haven't filtered on file type. The result will contain both BAMs and CRAMs, possibly other types of file as well.")
        if not filter_target:
            print(
                "WARNING! You haven't filtered by target field. You will get back the report from checking all the data, "
                "no matter if it is the target or not, hence possibly also PhiX")
        if not filter_npg_qc:
            print(
                "WARNING! You haven't filtered on manual_qc field. You will get the report from checking all the data, "
                "no matter if qc pass of fail.")

    if args.metadata_fetching_strategy == 'fetch_by_metadata':
        check_results_by_fpath = check_metadata_fetched_by_metadata(filter_npg_qc, filter_target, file_types,
                                                                    study_name, study_acc_nr, study_internal_id,
                                                                    irods_zone, reference)
    elif args.metadata_fetching_strategy == 'fetch_by_path':
        check_results_by_fpath = check_metadata_fetched_by_path(irods_fpaths, reference)
    elif args.metadata_fetching_strategy == 'given_at_stdin':
        check_results_by_fpath = check_metadata_given_as_json_stream(reference)
    else:
        raise ValueError("Fetching strategy not supported")

    if args.json_output:
        check_results_as_json = format_output_as_json(check_results_by_fpath)
        print(check_results_as_json)
    else:
        result_as_tsv_string = format_output_as_tsv(check_results_by_fpath)
        print(result_as_tsv_string)

    exit(decide_exit_status(check_results_by_fpath))