Ejemplo n.º 1
0
    def validate_fields(self) -> List:
        check_results = []
        upl_checksum_check = self.check_checksum_at_upload_present()
        check_results.append(upl_checksum_check)

        meta_checksum_check = self.check_checksum_in_meta_present()
        check_results.append(meta_checksum_check)

        comp_check = CheckResult(
            check_name=CHECK_NAMES.
            check_by_comparison_checksum_in_meta_with_checksum_at_upload)
        if upl_checksum_check.result == RESULT.SUCCESS and meta_checksum_check.result == RESULT.SUCCESS:
            if self.checksum_in_meta != self.checksum_at_upload:
                comp_check.result = RESULT.FAILURE
                comp_check.error_message = "The checksum in metadata = %s different than checksum at upload = %s" % \
                                           (self.checksum_in_meta, self.checksum_at_upload)
            else:
                comp_check.result = RESULT.SUCCESS
        else:
            comp_check.executed = False
            comp_check.result = None
        check_results.append(comp_check)

        check_npg_qc = self.check_npg_qc_field()
        check_results.append(check_npg_qc)

        check_target_field = self.check_target_field()
        check_results.append(check_target_field)
        return check_results
Ejemplo n.º 2
0
 def check_more_than_one_replicas(cls, replicas) -> CheckResult:
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_more_than_one_replica,
         severity=SEVERITY.WARNING)
     if len(replicas) <= 1:
         check_result.executed = True
         check_result.result = RESULT.FAILURE
         check_result.error_message = "File has " + str(
             len(replicas)) + " replicas"
     return check_result
Ejemplo n.º 3
0
 def checksum_comparison_check(self):
     check_result = CheckResult(
         check_name=CHECK_NAMES.
         check_by_comparison_checksum_in_meta_with_checksum_at_upload,
         error_message=[])
     impossible_to_exec = False
     if not self.checksum_at_upload:
         check_result.executed = False
         check_result.error_message.append("Missing ichecksum result.")
         impossible_to_exec = True
     if not self.checksum_in_meta:
         check_result.executed = False
         check_result.error_message.append("Missing checksum from metadata")
         impossible_to_exec = True
     if not impossible_to_exec and self.checksum_in_meta != self.checksum_at_upload:
         check_result.result = RESULT.FAILURE
         check_result.error_message = "The checksum in metadata = %s different than checksum at upload = %s" % \
                                      (self.checksum_in_meta, self.checksum_at_upload)
     return check_result
Ejemplo n.º 4
0
 def check_reference(self, desired_ref_name: str) -> List[CheckResult]:
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_desired_reference)
     check_result.error_message = []
     if not self.get_references():
         check_result.result = None
         check_result.executed = False
         check_result.error_message.append(
             "Missing reference from the metadata")
     if not desired_ref_name:
         check_result.result = None
         check_result.executed = False
         check_result.error_message.append(
             "Missing desired reference parameter")
     if not check_result.error_message:
         for ref in self.get_references():
             if ref.lower().find(desired_ref_name.lower()) == -1:
                 check_result.result = RESULT.FAILURE
                 check_result.error_message = "The desired reference is: %s is different thant the metadata reference: %s" % (
                     desired_ref_name, ref)
     return check_result
Ejemplo n.º 5
0
    def check_studies_fetched_by_samples(self):
        check_results = []
        same_study_for_samples_check = CheckResult(
            check_name=CHECK_NAMES.
            check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples)
        #check_for_samples_in_more_studies = CheckResult(check_name=CHECK_NAMES.check_for_samples_in_more_studies, severity=SEVERITY.WARNING)
        if not self.get_entities_by_type('sample'):
            same_study_for_samples_check.executed = False
            same_study_for_samples_check.result = None
            # check_for_samples_in_more_studies.executed = False
            # check_for_samples_in_more_studies.result = None
            # check_results.append(check_for_samples_in_more_studies)
            check_results.append(same_study_for_samples_check)
            return check_results
        studies_by_samples_set = set(
            self.get_all_entities_by_association_by_type('sample', 'study'))
        studies_set = set(self.get_entities_by_type('study'))

        studies_set_names = [study.name for study in studies_set]
        studies_by_samples_set_names = [
            study.name for study in studies_by_samples_set
        ]

        sample_set_ids = [(sample.name, sample.accession_number)
                          for sample in self.get_entities_by_type('sample')]
        if not studies_set.issubset(studies_by_samples_set):
            error_msg = "For the %s given seqscape samples, the studies in iRODS: %s and the studies in Seqscape DISAGREE: %s" % (
                str(len(sample_set_ids)), studies_set_names,
                studies_by_samples_set_names)
            same_study_for_samples_check.result = RESULT.FAILURE
            same_study_for_samples_check.error_message = error_msg
        else:
            diff_wrong_studies_for_samples_in_irods = studies_set.difference(
                studies_by_samples_set)
            if diff_wrong_studies_for_samples_in_irods:
                error_msg = "Studies in Seqscape and in iRODS for %s samples don't agree. Studies in iRODS and not in Seqscape: %s" % (
                    str(len(sample_set_ids)),
                    diff_wrong_studies_for_samples_in_irods)
                same_study_for_samples_check.result = RESULT.FAILURE
                same_study_for_samples_check.error_message = error_msg
        check_results.append(same_study_for_samples_check)

        # diff_sam_belongs2more_studies = studies_by_samples_set.difference(studies_set)
        # if diff_sam_belongs2more_studies:
        #     error_msg = "Some samples belong to more than one study. For samples: %s we had these studies as metadata: %s and we found in Seqscape these studies: %s" % (
        #         sample_set_ids,
        #         studies_set_names,
        #         studies_by_samples_set_names)
        #     check_for_samples_in_more_studies.result = RESULT.FAILURE
        #     check_for_samples_in_more_studies.error_message = error_msg
        # check_results.append(check_for_samples_in_more_studies)
        return check_results
Ejemplo n.º 6
0
 def check_samples_fetched_by_studies(self):
     check_result = CheckResult(check_name=CHECK_NAMES.check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape)
     #"Check if the sample ids in iRODS for a study belong to the same study in Sqeuencescape ")
     if not self.get_entities_by_type('study'):
         check_result.executed = False
         check_result.result = None
         return check_result
     samples_by_studies_set = set(self.get_all_entities_by_association_by_type('study', 'sample'))
     samples_set = set(self.get_entities_by_type('sample'))
     if not samples_set.issubset(samples_by_studies_set):
         diff_samples_wrong_study = samples_set.difference(samples_by_studies_set)
         error_msg = "Some samples don't appear under study(s): %s in Sequencescape, " \
                     "but they appear under this study in iRODS. Number of samples: %s, " \
                     "and ids: %s" % ([study.name for study in self.get_entities_by_type('study')],
                                      str(len(diff_samples_wrong_study)),
                                      [(s.name, s.accession_number) for s in diff_samples_wrong_study])
         check_result.error_message = error_msg
         check_result.result = RESULT.FAILURE
     return check_result
Ejemplo n.º 7
0
    def check_studies_fetched_by_samples(self):
        check_results = []
        same_study_for_samples_check = CheckResult(check_name=CHECK_NAMES.check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples)
        #check_for_samples_in_more_studies = CheckResult(check_name=CHECK_NAMES.check_for_samples_in_more_studies, severity=SEVERITY.WARNING)
        if not self.get_entities_by_type('sample'):
            same_study_for_samples_check.executed = False
            same_study_for_samples_check.result = None
            # check_for_samples_in_more_studies.executed = False
            # check_for_samples_in_more_studies.result = None
            # check_results.append(check_for_samples_in_more_studies)
            check_results.append(same_study_for_samples_check)
            return check_results
        studies_by_samples_set = set(self.get_all_entities_by_association_by_type('sample', 'study'))
        studies_set = set(self.get_entities_by_type('study'))

        studies_set_names = [study.name for study in studies_set]
        studies_by_samples_set_names = [study.name for study in studies_by_samples_set]

        sample_set_ids = [(sample.name, sample.accession_number) for sample in self.get_entities_by_type('sample')]
        if not studies_set.issubset(studies_by_samples_set):
            error_msg = "For the %s given seqscape samples, the studies in iRODS: %s and the studies in Seqscape DISAGREE: %s" % (str(len(sample_set_ids)), studies_set_names, studies_by_samples_set_names)
            same_study_for_samples_check.result = RESULT.FAILURE
            same_study_for_samples_check.error_message=error_msg
        else:
            diff_wrong_studies_for_samples_in_irods = studies_set.difference(studies_by_samples_set)
            if diff_wrong_studies_for_samples_in_irods:
                error_msg = "Studies in Seqscape and in iRODS for %s samples don't agree. Studies in iRODS and not in Seqscape: %s" % (
                    str(len(sample_set_ids)), diff_wrong_studies_for_samples_in_irods)
                same_study_for_samples_check.result = RESULT.FAILURE
                same_study_for_samples_check.error_message = error_msg
        check_results.append(same_study_for_samples_check)

        # diff_sam_belongs2more_studies = studies_by_samples_set.difference(studies_set)
        # if diff_sam_belongs2more_studies:
        #     error_msg = "Some samples belong to more than one study. For samples: %s we had these studies as metadata: %s and we found in Seqscape these studies: %s" % (
        #         sample_set_ids,
        #         studies_set_names,
        #         studies_by_samples_set_names)
        #     check_for_samples_in_more_studies.result = RESULT.FAILURE
        #     check_for_samples_in_more_studies.error_message = error_msg
        # check_results.append(check_for_samples_in_more_studies)
        return check_results
Ejemplo n.º 8
0
 def check_non_public_acls(cls, acls) -> List[CheckResult]:
     """
     Checks that the iRODS object doesn't have associated an ACL giving public access to users to it.
     :param acls:
     :return:
     """
     # problems = []
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_no_public_acl,
         severity=SEVERITY.WARNING)
     if not acls:
         check_result.result = None
         check_result.executed = False
         check_result.error_message = "There are no ACLs."
         return check_result
     for acl in acls:
         if acl.provides_public_access():
             check_result.error_message = error_message = "The following ACL was found: " + str(
                 acl)
             check_result.result = RESULT.FAILURE
             break
     return check_result
Ejemplo n.º 9
0
 def check_all_replicas_have_same_checksum(cls,
                                           replicas) -> CheckResult:
     result = CheckResult(
         check_name=CHECK_NAMES.check_all_replicas_same_checksum,
         severity=SEVERITY.IMPORTANT)
     if not replicas:
         result.executed = False
         result.error_message = ["No replicas to compare with."]
         result.result = None
         return result
     first_replica = replicas[0]
     error_message = ''
     for replica in replicas:
         if not replica.checksum == first_replica.checksum:
             result.result = RESULT.FAILURE
             error_message += "Replica: " + str(
                 replica
             ) + " has different checksum than replica: " + str(
                 first_replica)
     if error_message:
         result.error_message = error_message
     return result
Ejemplo n.º 10
0
 def check_samples_fetched_by_studies(self):
     check_result = CheckResult(
         check_name=CHECK_NAMES.
         check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape
     )
     #"Check if the sample ids in iRODS for a study belong to the same study in Sqeuencescape ")
     if not self.get_entities_by_type('study'):
         check_result.executed = False
         check_result.result = None
         return check_result
     samples_by_studies_set = set(
         self.get_all_entities_by_association_by_type('study', 'sample'))
     samples_set = set(self.get_entities_by_type('sample'))
     if not samples_set.issubset(samples_by_studies_set):
         diff_samples_wrong_study = samples_set.difference(
             samples_by_studies_set)
         error_msg = "Some samples don't appear under study(s): %s in Sequencescape, " \
                     "but they appear under this study in iRODS. Number of samples: %s, " \
                     "and ids: %s" % ([study.name for study in self.get_entities_by_type('study')],
                                      str(len(diff_samples_wrong_study)),
                                      [(s.name, s.accession_number) for s in diff_samples_wrong_study])
         check_result.error_message = error_msg
         check_result.result = RESULT.FAILURE
     return check_result
Ejemplo n.º 11
0
    def check_metadata_across_different_sources(irods_metadata_dict,
                                                header_metadata_dict,
                                                seqsc_metadata_dict,
                                                issues_dict):
        """
        This function checks the metadata from 3 different sources in terms of samples, libraries and studies.
        At the moment the checks across these sources consist of comparing: libraries, studies and samples
        As a result it updates the issues_dict by appending the CheckResults obtain after running the latest tests.
        :param irods_metadata_dict: key: fpath, value: irods_metadata for that file
        :param header_metadata_dict: key: fpath, value: header_metadata for that file
        :param seqsc_metadata_dict: key: fpath, value: seqscape_metadata for that file
        :param issues_dict: key: fpath, value: list of CheckResults
        :return:
        """
        for fpath, irods_metadata in irods_metadata_dict.items():
            header_metadata = header_metadata_dict.get(fpath)
            seqscape_metadata = seqsc_metadata_dict.get(fpath)

            ss_vs_h_check_result = CheckResult(
                check_name=CHECK_NAMES.
                check_seqscape_ids_compared_to_header_ids,
                error_message=[])
            h_vs_ss_check_result = CheckResult(
                check_name=CHECK_NAMES.
                check_header_ids_compared_to_seqscape_ids,
                error_message=[])
            i_vs_h_check_result = CheckResult(
                check_name=CHECK_NAMES.check_irods_ids_compared_to_header_ids,
                error_message=[])
            h_vs_i_check_result = CheckResult(
                check_name=CHECK_NAMES.check_header_ids_compared_to_irods_ids,
                error_message=[])
            if not header_metadata.has_metadata():
                error_msg = "No header metadata"
                ss_vs_h_check_result.executed = False
                h_vs_ss_check_result.executed = False
                i_vs_h_check_result.executed = False
                h_vs_i_check_result.executed = False

                i_vs_h_check_result.result = None
                h_vs_i_check_result.result = None
                h_vs_ss_check_result.result = None
                ss_vs_h_check_result.result = None

                ss_vs_h_check_result.error_message.append(error_msg)
                h_vs_ss_check_result.error_message.append(error_msg)
                i_vs_h_check_result.error_message.append(error_msg)
                h_vs_i_check_result.error_message.append(error_msg)
            else:
                if not seqscape_metadata.has_metadata():
                    error_msg = "No seqscape metadata"
                    ss_vs_h_check_result.executed = False
                    h_vs_ss_check_result.executed = False
                    ss_vs_h_check_result.result = None
                    h_vs_ss_check_result.result = None
                    ss_vs_h_check_result.error_message.append(error_msg)
                    h_vs_ss_check_result.error_message.append(error_msg)
                else:
                    seqscape_diff_header = seqscape_metadata.difference(
                        header_metadata)
                    header_diff_seqscape = header_metadata.difference(
                        seqscape_metadata)
                    if seqscape_diff_header:
                        error_msg = "Differences: %s" % seqscape_diff_header
                        ss_vs_h_check_result.error_message = error_msg
                        ss_vs_h_check_result.result = RESULT.FAILURE
                    if header_diff_seqscape:
                        error_msg = "Differences: %s" % header_diff_seqscape
                        h_vs_ss_check_result.result = RESULT.FAILURE
                        h_vs_ss_check_result.error_message = error_msg

                if not irods_metadata.has_metadata():
                    error_msg = "No irods metadata"
                    i_vs_h_check_result.executed = False
                    h_vs_i_check_result.executed = False
                    i_vs_h_check_result.result = None
                    h_vs_i_check_result.result = None
                    i_vs_h_check_result.error_message.append(error_msg)
                    h_vs_i_check_result.error_message.append(error_msg)
                else:
                    irods_diff_header = irods_metadata.difference(
                        header_metadata)
                    header_diff_irods = header_metadata.difference(
                        irods_metadata)
                    if irods_diff_header:
                        error_msg = "Differences: %s" % irods_diff_header
                        i_vs_h_check_result.error_message = error_msg
                        i_vs_h_check_result.result = RESULT.FAILURE

                    if header_diff_irods:
                        error_msg = "Differences between what is in the header and not in iRODS: %s" % header_diff_irods
                        h_vs_i_check_result.error_message = error_msg
                        h_vs_i_check_result.result = RESULT.FAILURE

            issues_dict[fpath].append(ss_vs_h_check_result)
            issues_dict[fpath].append(h_vs_ss_check_result)
            issues_dict[fpath].append(i_vs_h_check_result)
            issues_dict[fpath].append(h_vs_i_check_result)