def check_read_permission_exists_for_ss_group(
         cls, acls) -> List[CheckResult]:
     """
     Checks if any of the ACLs is for an ss group.
     :param acls:
     :return:
     """
     # problems = []
     check_result_read_permission = CheckResult(
         check_name=CHECK_NAMES.check_ss_irods_group_read_permission,
         severity=SEVERITY.WARNING)
     check_result_ss_group_present = CheckResult(
         check_name=CHECK_NAMES.check_there_is_ss_irods_group,
         severity=SEVERITY.WARNING)
     found_ss_gr_acl = False
     for acl in acls:
         if acl.provides_access_for_ss_group():
             found_ss_gr_acl = True
             if not acl.provides_read_permission():
                 check_result_read_permission.result = RESULT.FAILURE
                 check_result_read_permission.error_message = "ACL found: " + str(
                     acl)
             break
     if not found_ss_gr_acl:
         check_result_ss_group_present.result = RESULT.FAILURE
         check_result_read_permission.result = RESULT.FAILURE
     return [
         check_result_ss_group_present, check_result_read_permission
     ]
Beispiel #2
0
    def validate_fields(self):
        check_results = []
        checksum_check_result = CheckResult(
            check_name=CHECK_NAMES.check_replica_checksum_valid,
            severity=SEVERITY.IMPORTANT)
        try:
            is_valid_checksum = self._is_checksum_valid(self.checksum)
            if not is_valid_checksum:
                checksum_check_result.result = RESULT.FAILURE
                checksum_check_result.error_message = "The checksum looks invalid: " + str(
                    self.checksum)
        except TypeError as e:
            checksum_check_result.result = RESULT.FAILURE
            checksum_check_result.error_message = "The checksum looks invalid: " + str(
                self.checksum)

        valid_replicas_check_result = CheckResult(
            check_name=CHECK_NAMES.check_replica_number,
            severity=SEVERITY.WARNING)
        if not self._is_replica_nr_valid(self.replica_nr):
            valid_replicas_check_result.result = RESULT.FAILURE
            valid_replicas_check_result.error_message = "The replica number looks invalid: " + str(
                self.replica_nr)

        check_results.append(checksum_check_result)
        check_results.append(valid_replicas_check_result)
        return check_results
    def validate_fields(self) -> List:
        check_results = []
        upl_checksum_check = self.check_checksum_at_upload_present()
        check_results.append(upl_checksum_check)

        meta_checksum_check = self.check_checksum_in_meta_present()
        check_results.append(meta_checksum_check)

        comp_check = CheckResult(
            check_name=CHECK_NAMES.
            check_by_comparison_checksum_in_meta_with_checksum_at_upload)
        if upl_checksum_check.result == RESULT.SUCCESS and meta_checksum_check.result == RESULT.SUCCESS:
            if self.checksum_in_meta != self.checksum_at_upload:
                comp_check.result = RESULT.FAILURE
                comp_check.error_message = "The checksum in metadata = %s different than checksum at upload = %s" % \
                                           (self.checksum_in_meta, self.checksum_at_upload)
            else:
                comp_check.result = RESULT.SUCCESS
        else:
            comp_check.executed = False
            comp_check.result = None
        check_results.append(comp_check)

        check_npg_qc = self.check_npg_qc_field()
        check_results.append(check_npg_qc)

        check_target_field = self.check_target_field()
        check_results.append(check_target_field)
        return check_results
 def check_npg_qc_field(self):
     check_npg_qc = CheckResult(check_name=CHECK_NAMES.check_npg_qc_field)
     if self.get_npg_qc() is None:
         check_npg_qc.result = RESULT.FAILURE
         check_npg_qc.error_message = "Missing npg_qc field"
     elif not self._is_npg_qc_valid(self.get_npg_qc()):
         check_npg_qc.error_message = "This npg_qc field looks invalid: " + str(
             self.get_npg_qc())
         check_npg_qc.result = RESULT.FAILURE
     return check_npg_qc
 def check_checksum_in_meta_present(self):
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_checksum_in_metadata_present,
         severity=SEVERITY.WARNING)
     if self.checksum_in_meta:
         check_result.result = RESULT.SUCCESS
     else:
         check_result.result = RESULT.FAILURE
         check_result.error_message = "Missing checksum from metadata"
     return check_result
 def check_target_field(self):
     check_target_field = CheckResult(
         check_name=CHECK_NAMES.check_target_field)
     if self.get_target() is None:
         check_target_field.result = RESULT.FAILURE
         check_target_field.error_message = "Missing target field"
     elif not self._is_target_valid(self.get_target()):
         check_target_field.error_message = "The target field looks invalid: " + str(
             self.get_target())
         check_target_field.result = RESULT.FAILURE
     return check_target_field
 def _check_for_invalid_ids(cls, multi_ids_dict: typing.Dict, entity_type: str):
     check_result = CheckResult(check_name=CHECK_NAMES.check_valid_ids, error_message=[])
     if not multi_ids_dict:
         check_result.result = RESULT.FAILURE
         check_result.error_message.append("No ids found.")
     for k, values in multi_ids_dict.items():
         wrong_ids = [id for id in values if not cls._is_id_valid(id)]
         if wrong_ids:
             check_result.error_message.append("Invalid " + str(k) + "(s) for " + str(entity_type) + ": " + str(wrong_ids))
             check_result.result = RESULT.FAILURE
     return check_result
Beispiel #8
0
    def check_studies_fetched_by_samples(self):
        check_results = []
        same_study_for_samples_check = CheckResult(
            check_name=CHECK_NAMES.
            check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples)
        #check_for_samples_in_more_studies = CheckResult(check_name=CHECK_NAMES.check_for_samples_in_more_studies, severity=SEVERITY.WARNING)
        if not self.get_entities_by_type('sample'):
            same_study_for_samples_check.executed = False
            same_study_for_samples_check.result = None
            # check_for_samples_in_more_studies.executed = False
            # check_for_samples_in_more_studies.result = None
            # check_results.append(check_for_samples_in_more_studies)
            check_results.append(same_study_for_samples_check)
            return check_results
        studies_by_samples_set = set(
            self.get_all_entities_by_association_by_type('sample', 'study'))
        studies_set = set(self.get_entities_by_type('study'))

        studies_set_names = [study.name for study in studies_set]
        studies_by_samples_set_names = [
            study.name for study in studies_by_samples_set
        ]

        sample_set_ids = [(sample.name, sample.accession_number)
                          for sample in self.get_entities_by_type('sample')]
        if not studies_set.issubset(studies_by_samples_set):
            error_msg = "For the %s given seqscape samples, the studies in iRODS: %s and the studies in Seqscape DISAGREE: %s" % (
                str(len(sample_set_ids)), studies_set_names,
                studies_by_samples_set_names)
            same_study_for_samples_check.result = RESULT.FAILURE
            same_study_for_samples_check.error_message = error_msg
        else:
            diff_wrong_studies_for_samples_in_irods = studies_set.difference(
                studies_by_samples_set)
            if diff_wrong_studies_for_samples_in_irods:
                error_msg = "Studies in Seqscape and in iRODS for %s samples don't agree. Studies in iRODS and not in Seqscape: %s" % (
                    str(len(sample_set_ids)),
                    diff_wrong_studies_for_samples_in_irods)
                same_study_for_samples_check.result = RESULT.FAILURE
                same_study_for_samples_check.error_message = error_msg
        check_results.append(same_study_for_samples_check)

        # diff_sam_belongs2more_studies = studies_by_samples_set.difference(studies_set)
        # if diff_sam_belongs2more_studies:
        #     error_msg = "Some samples belong to more than one study. For samples: %s we had these studies as metadata: %s and we found in Seqscape these studies: %s" % (
        #         sample_set_ids,
        #         studies_set_names,
        #         studies_by_samples_set_names)
        #     check_for_samples_in_more_studies.result = RESULT.FAILURE
        #     check_for_samples_in_more_studies.error_message = error_msg
        # check_results.append(check_for_samples_in_more_studies)
        return check_results
Beispiel #9
0
    def validate_fields(self):
        check_results = []
        zone_check_result = CheckResult(check_name=CHECK_NAMES.check_irods_zone_within_acl, severity=SEVERITY.WARNING)
        if not self._is_irods_zone_valid(self.zone):
            zone_check_result.result = RESULT.FAILURE
            zone_check_result.error_message="The iRODS zone seems wrong: " + str(self.zone) + " in acl = " + str(self)
        check_results.append(zone_check_result)

        permission_check_result = CheckResult(check_name=CHECK_NAMES.check_irods_permission_within_acl, severity=SEVERITY.WARNING)
        if not self._is_permission_valid(self.permission):
            permission_check_result.result = RESULT.FAILURE
            permission_check_result.error_message = "The iRODS permission seems wrong: " + str(self.permission) + " in  acl = " + str(self)
        check_results.append(permission_check_result)
        return check_results
 def check_checksum_at_upload_present(self):
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_checksum_at_upload_present)
     if self.checksum_at_upload:
         if type(self.checksum_at_upload) is Iterable and len(
                 set(self.checksum_at_upload)) > 1:
             check_result.result = RESULT.FAILURE
             check_result.error_message = "Different checksum values at upload (ichksum)"
         else:
             check_result.result = RESULT.SUCCESS
     else:
         check_result.result = RESULT.FAILURE
         check_result.error_message = "Missing checksum from ichksum result"
     return check_result
 def _check_for_invalid_ids(cls, multi_ids_dict: typing.Dict,
                            entity_type: str):
     check_result = CheckResult(check_name=CHECK_NAMES.check_valid_ids,
                                error_message=[])
     if not multi_ids_dict:
         check_result.result = RESULT.FAILURE
         check_result.error_message.append("No ids found.")
     for k, values in multi_ids_dict.items():
         wrong_ids = [id for id in values if not cls._is_id_valid(id)]
         if wrong_ids:
             check_result.error_message.append("Invalid " + str(k) +
                                               "(s) for " +
                                               str(entity_type) + ": " +
                                               str(wrong_ids))
             check_result.result = RESULT.FAILURE
     return check_result
Beispiel #12
0
    def _check_by_comparison_entities_fetched_by_different_id_types(
            cls, query_results: List[SeqscapeEntityQueryAndResults]) -> List:
        check_result = CheckResult(
            check_name=CHECK_NAMES.
            check_entities_in_seqscape_fetched_by_different_ids)
        for i in range(1, len(query_results)):
            entities_1 = query_results[i - 1]
            entities_2 = query_results[i]
            if not (set(entities_1.entities_fetched).issubset(
                    set(entities_2.entities_fetched))
                    or set(entities_2.entities_fetched).issubset(
                        set(entities_1.entities_fetched))):
                id_type_1 = entities_1.query_id_type
                id_type_2 = entities_2.query_id_type
                diff_1 = set(entities_1.entities_fetched).difference(
                    set(entities_2.entities_fetched))
                diff_2 = set(entities_2.entities_fetched).difference(
                    set(entities_1.entities_fetched))
                error_message = ""
                if diff_1:
                    error_message = "Extra %s found when querying by %s compared to %s: %s." % (
                        entities_1.query_entity_type, id_type_1, id_type_2,
                        diff_1)
                if diff_2:
                    error_message += "Extra %s found when querying by %s compared to %s: %s." % (
                        entities_2.query_entity_type, id_type_2, id_type_1,
                        diff_2)
                if not diff_2 and not diff_1:
                    raise ValueError(
                        "Somehow the entity sets are different, but I can't detect any difference."
                    )

                check_result.error_message = error_message
                check_result.result = RESULT.FAILURE
        return check_result
 def check_all_ids_were_found(self) -> List:
     ids_missing = self._find_missing_ids()
     check_result = CheckResult(check_name=CHECK_NAMES.check_all_irods_ids_found_in_seqscape)
     if ids_missing:
         check_result.error_message="The following ids weren't found in SequencescapeDB: %s " % ids_missing
         check_result.result = RESULT.FAILURE
     return check_result
    def _check_by_comparison_entities_fetched_by_different_id_types(cls, query_results: List[
        SeqscapeEntityQueryAndResults]) -> List:
        check_result = CheckResult(check_name=CHECK_NAMES.check_entities_in_seqscape_fetched_by_different_ids)
        for i in range(1, len(query_results)):
            entities_1 = query_results[i - 1]
            entities_2 = query_results[i]
            if not (set(entities_1.entities_fetched).issubset(set(entities_2.entities_fetched)) or
                        set(entities_2.entities_fetched).issubset(set(entities_1.entities_fetched))):
                id_type_1 = entities_1.query_id_type
                id_type_2 = entities_2.query_id_type
                diff_1 = set(entities_1.entities_fetched).difference(set(entities_2.entities_fetched))
                diff_2 = set(entities_2.entities_fetched).difference(set(entities_1.entities_fetched))
                error_message = ""
                if diff_1:
                    error_message = "Extra %s found when querying by %s compared to %s: %s." % (
                        entities_1.query_entity_type, id_type_1, id_type_2, diff_1)
                if diff_2:
                    error_message += "Extra %s found when querying by %s compared to %s: %s." % (
                        entities_2.query_entity_type, id_type_2, id_type_1, diff_2)
                if not diff_2 and not diff_1:
                    raise ValueError("Somehow the entity sets are different, but I can't detect any difference.")

                check_result.error_message=error_message
                check_result.result = RESULT.FAILURE
        return check_result
Beispiel #15
0
 def check_all_ids_were_found(self) -> List:
     ids_missing = self._find_missing_ids()
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_all_irods_ids_found_in_seqscape)
     if ids_missing:
         check_result.error_message = "The following ids weren't found in SequencescapeDB: %s " % ids_missing
         check_result.result = RESULT.FAILURE
     return check_result
 def check_no_duplicates_found(self) -> List:
     check_result = CheckResult(check_name=CHECK_NAMES.check_for_duplicated_ids_within_seqscape)
     ids_dupl = self._find_duplicated_ids()
     if ids_dupl:
         entities_dupl = [ent for ent in self.entities_fetched if getattr(ent, self.query_id_type) in ids_dupl]
         check_result.error_message="The following ids: %s are duplicated - entities: %s" % (ids_dupl, entities_dupl)
         check_result.result = RESULT.FAILURE
     return check_result
 def check_attributes_have_the_right_frequency(cls, standard_attr_dict,
                                               actual_attr_dict):
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_attribute_count,
         executed=True,
         result=RESULT.SUCCESS,
         error_message=[])
     for attr, freq in standard_attr_dict.items():
         if not attr in actual_attr_dict:
             check_result.result = RESULT.FAILURE
             check_result.error_message.append('Missing attribute %s' %
                                               attr)
         elif freq != actual_attr_dict[attr]:
             check_result.result = RESULT.FAILURE
             check_result.error_message.append(
                 "Attribute %s should appear %s times and instead appears %s times"
                 % (attr, freq, actual_attr_dict[attr]))
     return check_result
 def check_more_than_one_replicas(cls, replicas) -> CheckResult:
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_more_than_one_replica,
         severity=SEVERITY.WARNING)
     if len(replicas) <= 1:
         check_result.executed = True
         check_result.result = RESULT.FAILURE
         check_result.error_message = "File has " + str(
             len(replicas)) + " replicas"
     return check_result
 def check_samples_fetched_by_studies(self):
     check_result = CheckResult(check_name=CHECK_NAMES.check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape)
     #"Check if the sample ids in iRODS for a study belong to the same study in Sqeuencescape ")
     if not self.get_entities_by_type('study'):
         check_result.executed = False
         check_result.result = None
         return check_result
     samples_by_studies_set = set(self.get_all_entities_by_association_by_type('study', 'sample'))
     samples_set = set(self.get_entities_by_type('sample'))
     if not samples_set.issubset(samples_by_studies_set):
         diff_samples_wrong_study = samples_set.difference(samples_by_studies_set)
         error_msg = "Some samples don't appear under study(s): %s in Sequencescape, " \
                     "but they appear under this study in iRODS. Number of samples: %s, " \
                     "and ids: %s" % ([study.name for study in self.get_entities_by_type('study')],
                                      str(len(diff_samples_wrong_study)),
                                      [(s.name, s.accession_number) for s in diff_samples_wrong_study])
         check_result.error_message = error_msg
         check_result.result = RESULT.FAILURE
     return check_result
    def check_studies_fetched_by_samples(self):
        check_results = []
        same_study_for_samples_check = CheckResult(check_name=CHECK_NAMES.check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples)
        #check_for_samples_in_more_studies = CheckResult(check_name=CHECK_NAMES.check_for_samples_in_more_studies, severity=SEVERITY.WARNING)
        if not self.get_entities_by_type('sample'):
            same_study_for_samples_check.executed = False
            same_study_for_samples_check.result = None
            # check_for_samples_in_more_studies.executed = False
            # check_for_samples_in_more_studies.result = None
            # check_results.append(check_for_samples_in_more_studies)
            check_results.append(same_study_for_samples_check)
            return check_results
        studies_by_samples_set = set(self.get_all_entities_by_association_by_type('sample', 'study'))
        studies_set = set(self.get_entities_by_type('study'))

        studies_set_names = [study.name for study in studies_set]
        studies_by_samples_set_names = [study.name for study in studies_by_samples_set]

        sample_set_ids = [(sample.name, sample.accession_number) for sample in self.get_entities_by_type('sample')]
        if not studies_set.issubset(studies_by_samples_set):
            error_msg = "For the %s given seqscape samples, the studies in iRODS: %s and the studies in Seqscape DISAGREE: %s" % (str(len(sample_set_ids)), studies_set_names, studies_by_samples_set_names)
            same_study_for_samples_check.result = RESULT.FAILURE
            same_study_for_samples_check.error_message=error_msg
        else:
            diff_wrong_studies_for_samples_in_irods = studies_set.difference(studies_by_samples_set)
            if diff_wrong_studies_for_samples_in_irods:
                error_msg = "Studies in Seqscape and in iRODS for %s samples don't agree. Studies in iRODS and not in Seqscape: %s" % (
                    str(len(sample_set_ids)), diff_wrong_studies_for_samples_in_irods)
                same_study_for_samples_check.result = RESULT.FAILURE
                same_study_for_samples_check.error_message = error_msg
        check_results.append(same_study_for_samples_check)

        # diff_sam_belongs2more_studies = studies_by_samples_set.difference(studies_set)
        # if diff_sam_belongs2more_studies:
        #     error_msg = "Some samples belong to more than one study. For samples: %s we had these studies as metadata: %s and we found in Seqscape these studies: %s" % (
        #         sample_set_ids,
        #         studies_set_names,
        #         studies_by_samples_set_names)
        #     check_for_samples_in_more_studies.result = RESULT.FAILURE
        #     check_for_samples_in_more_studies.error_message = error_msg
        # check_results.append(check_for_samples_in_more_studies)
        return check_results
Beispiel #21
0
    def validate_fields(self):
        check_results = []
        zone_check_result = CheckResult(
            check_name=CHECK_NAMES.check_irods_zone_within_acl,
            severity=SEVERITY.WARNING)
        if not self._is_irods_zone_valid(self.zone):
            zone_check_result.result = RESULT.FAILURE
            zone_check_result.error_message = "The iRODS zone seems wrong: " + str(
                self.zone) + " in acl = " + str(self)
        check_results.append(zone_check_result)

        permission_check_result = CheckResult(
            check_name=CHECK_NAMES.check_irods_permission_within_acl,
            severity=SEVERITY.WARNING)
        if not self._is_permission_valid(self.permission):
            permission_check_result.result = RESULT.FAILURE
            permission_check_result.error_message = "The iRODS permission seems wrong: " + str(
                self.permission) + " in  acl = " + str(self)
        check_results.append(permission_check_result)
        return check_results
 def check_reference(self, desired_ref_name: str) -> List[CheckResult]:
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_desired_reference)
     check_result.error_message = []
     if not self.get_references():
         check_result.result = None
         check_result.executed = False
         check_result.error_message.append(
             "Missing reference from the metadata")
     if not desired_ref_name:
         check_result.result = None
         check_result.executed = False
         check_result.error_message.append(
             "Missing desired reference parameter")
     if not check_result.error_message:
         for ref in self.get_references():
             if ref.lower().find(desired_ref_name.lower()) == -1:
                 check_result.result = RESULT.FAILURE
                 check_result.error_message = "The desired reference is: %s is different thant the metadata reference: %s" % (
                     desired_ref_name, ref)
     return check_result
 def _check_entities_have_all_types_of_ids(entity_list, mandatory_id_types, entity_type):
     check_result = CheckResult(check_name=CHECK_NAMES.check_all_id_types_present)
     for entity in entity_list:
         missing_id_types = []
         for id_type in mandatory_id_types:
             if not getattr(entity, id_type):
                 missing_id_types.append(id_type)
         if missing_id_types:
             present_ids = tuple(set(mandatory_id_types).difference(set(missing_id_types)))
             present_id_vals = [getattr(entity, id) for id in present_ids]
             check_result.error_message='Missing %s %s from %s: %s' % (entity_type, missing_id_types, entity_type, present_id_vals)
             check_result.result = RESULT.FAILURE
     return check_result
Beispiel #24
0
 def check_no_duplicates_found(self) -> List:
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_for_duplicated_ids_within_seqscape)
     ids_dupl = self._find_duplicated_ids()
     if ids_dupl:
         entities_dupl = [
             ent for ent in self.entities_fetched
             if getattr(ent, self.query_id_type) in ids_dupl
         ]
         check_result.error_message = "The following ids: %s are duplicated - entities: %s" % (
             ids_dupl, entities_dupl)
         check_result.result = RESULT.FAILURE
     return check_result
 def check_non_public_acls(cls, acls) -> List[CheckResult]:
     """
     Checks that the iRODS object doesn't have associated an ACL giving public access to users to it.
     :param acls:
     :return:
     """
     # problems = []
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_no_public_acl,
         severity=SEVERITY.WARNING)
     if not acls:
         check_result.result = None
         check_result.executed = False
         check_result.error_message = "There are no ACLs."
         return check_result
     for acl in acls:
         if acl.provides_public_access():
             check_result.error_message = error_message = "The following ACL was found: " + str(
                 acl)
             check_result.result = RESULT.FAILURE
             break
     return check_result
 def check_all_replicas_have_same_checksum(cls,
                                           replicas) -> CheckResult:
     result = CheckResult(
         check_name=CHECK_NAMES.check_all_replicas_same_checksum,
         severity=SEVERITY.IMPORTANT)
     if not replicas:
         result.executed = False
         result.error_message = ["No replicas to compare with."]
         result.result = None
         return result
     first_replica = replicas[0]
     error_message = ''
     for replica in replicas:
         if not replica.checksum == first_replica.checksum:
             result.result = RESULT.FAILURE
             error_message += "Replica: " + str(
                 replica
             ) + " has different checksum than replica: " + str(
                 first_replica)
     if error_message:
         result.error_message = error_message
     return result
Beispiel #27
0
    def validate_fields(self):
        check_results = []
        checksum_check_result = CheckResult(check_name=CHECK_NAMES.check_replica_checksum_valid,
                                            severity=SEVERITY.IMPORTANT)
        try:
            is_valid_checksum = self._is_checksum_valid(self.checksum)
            if not is_valid_checksum:
                checksum_check_result.result = RESULT.FAILURE
                checksum_check_result.error_message = "The checksum looks invalid: " + str(self.checksum)
        except TypeError as e:
            checksum_check_result.result = RESULT.FAILURE
            checksum_check_result.error_message = "The checksum looks invalid: " + str(self.checksum)

        valid_replicas_check_result = CheckResult(check_name=CHECK_NAMES.check_replica_number,
                                                  severity=SEVERITY.WARNING)
        if not self._is_replica_nr_valid(self.replica_nr):
            valid_replicas_check_result.result = RESULT.FAILURE
            valid_replicas_check_result.error_message = "The replica number looks invalid: " + str(self.replica_nr)

        check_results.append(checksum_check_result)
        check_results.append(valid_replicas_check_result)
        return check_results
Beispiel #28
0
 def check_samples_fetched_by_studies(self):
     check_result = CheckResult(
         check_name=CHECK_NAMES.
         check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape
     )
     #"Check if the sample ids in iRODS for a study belong to the same study in Sqeuencescape ")
     if not self.get_entities_by_type('study'):
         check_result.executed = False
         check_result.result = None
         return check_result
     samples_by_studies_set = set(
         self.get_all_entities_by_association_by_type('study', 'sample'))
     samples_set = set(self.get_entities_by_type('sample'))
     if not samples_set.issubset(samples_by_studies_set):
         diff_samples_wrong_study = samples_set.difference(
             samples_by_studies_set)
         error_msg = "Some samples don't appear under study(s): %s in Sequencescape, " \
                     "but they appear under this study in iRODS. Number of samples: %s, " \
                     "and ids: %s" % ([study.name for study in self.get_entities_by_type('study')],
                                      str(len(diff_samples_wrong_study)),
                                      [(s.name, s.accession_number) for s in diff_samples_wrong_study])
         check_result.error_message = error_msg
         check_result.result = RESULT.FAILURE
     return check_result
Beispiel #29
0
 def _check_entities_have_all_types_of_ids(entity_list, mandatory_id_types,
                                           entity_type):
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_all_id_types_present)
     for entity in entity_list:
         missing_id_types = []
         for id_type in mandatory_id_types:
             if not getattr(entity, id_type):
                 missing_id_types.append(id_type)
         if missing_id_types:
             present_ids = tuple(
                 set(mandatory_id_types).difference(set(missing_id_types)))
             present_id_vals = [getattr(entity, id) for id in present_ids]
             check_result.error_message = 'Missing %s %s from %s: %s' % (
                 entity_type, missing_id_types, entity_type,
                 present_id_vals)
             check_result.result = RESULT.FAILURE
     return check_result
 def checksum_comparison_check(self):
     check_result = CheckResult(
         check_name=CHECK_NAMES.
         check_by_comparison_checksum_in_meta_with_checksum_at_upload,
         error_message=[])
     impossible_to_exec = False
     if not self.checksum_at_upload:
         check_result.executed = False
         check_result.error_message.append("Missing ichecksum result.")
         impossible_to_exec = True
     if not self.checksum_in_meta:
         check_result.executed = False
         check_result.error_message.append("Missing checksum from metadata")
         impossible_to_exec = True
     if not impossible_to_exec and self.checksum_in_meta != self.checksum_at_upload:
         check_result.result = RESULT.FAILURE
         check_result.error_message = "The checksum in metadata = %s different than checksum at upload = %s" % \
                                      (self.checksum_in_meta, self.checksum_at_upload)
     return check_result
 def check_attribute_count(
         self, avu_counts: List[AttributeCount]) -> List[CheckResult]:
     check_result = CheckResult(
         check_name=CHECK_NAMES.check_attribute_count,
         severity=SEVERITY.IMPORTANT)
     wrong_counts = []
     for avu_count in avu_counts:
         actual_count = self.get_values_count_for_attribute(
             avu_count.attribute)
         threshold = avu_count.count
         if not self._is_true_comparison(actual_count, threshold,
                                         avu_count.operator):
             wrong_counts.append(
                 "attribute %s should appear %s %s times and appears %s" %
                 (avu_count.attribute, avu_count.operator, threshold,
                  actual_count))
     if wrong_counts:
         check_result.result = RESULT.FAILURE
         check_result.error_message = ','.join(wrong_counts)
     return check_result
Beispiel #32
0
    def check_metadata_across_different_sources(irods_metadata_dict,
                                                header_metadata_dict,
                                                seqsc_metadata_dict,
                                                issues_dict):
        """
        This function checks the metadata from 3 different sources in terms of samples, libraries and studies.
        At the moment the checks across these sources consist of comparing: libraries, studies and samples
        As a result it updates the issues_dict by appending the CheckResults obtain after running the latest tests.
        :param irods_metadata_dict: key: fpath, value: irods_metadata for that file
        :param header_metadata_dict: key: fpath, value: header_metadata for that file
        :param seqsc_metadata_dict: key: fpath, value: seqscape_metadata for that file
        :param issues_dict: key: fpath, value: list of CheckResults
        :return:
        """
        for fpath, irods_metadata in irods_metadata_dict.items():
            header_metadata = header_metadata_dict.get(fpath)
            seqscape_metadata = seqsc_metadata_dict.get(fpath)

            ss_vs_h_check_result = CheckResult(
                check_name=CHECK_NAMES.
                check_seqscape_ids_compared_to_header_ids,
                error_message=[])
            h_vs_ss_check_result = CheckResult(
                check_name=CHECK_NAMES.
                check_header_ids_compared_to_seqscape_ids,
                error_message=[])
            i_vs_h_check_result = CheckResult(
                check_name=CHECK_NAMES.check_irods_ids_compared_to_header_ids,
                error_message=[])
            h_vs_i_check_result = CheckResult(
                check_name=CHECK_NAMES.check_header_ids_compared_to_irods_ids,
                error_message=[])
            if not header_metadata.has_metadata():
                error_msg = "No header metadata"
                ss_vs_h_check_result.executed = False
                h_vs_ss_check_result.executed = False
                i_vs_h_check_result.executed = False
                h_vs_i_check_result.executed = False

                i_vs_h_check_result.result = None
                h_vs_i_check_result.result = None
                h_vs_ss_check_result.result = None
                ss_vs_h_check_result.result = None

                ss_vs_h_check_result.error_message.append(error_msg)
                h_vs_ss_check_result.error_message.append(error_msg)
                i_vs_h_check_result.error_message.append(error_msg)
                h_vs_i_check_result.error_message.append(error_msg)
            else:
                if not seqscape_metadata.has_metadata():
                    error_msg = "No seqscape metadata"
                    ss_vs_h_check_result.executed = False
                    h_vs_ss_check_result.executed = False
                    ss_vs_h_check_result.result = None
                    h_vs_ss_check_result.result = None
                    ss_vs_h_check_result.error_message.append(error_msg)
                    h_vs_ss_check_result.error_message.append(error_msg)
                else:
                    seqscape_diff_header = seqscape_metadata.difference(
                        header_metadata)
                    header_diff_seqscape = header_metadata.difference(
                        seqscape_metadata)
                    if seqscape_diff_header:
                        error_msg = "Differences: %s" % seqscape_diff_header
                        ss_vs_h_check_result.error_message = error_msg
                        ss_vs_h_check_result.result = RESULT.FAILURE
                    if header_diff_seqscape:
                        error_msg = "Differences: %s" % header_diff_seqscape
                        h_vs_ss_check_result.result = RESULT.FAILURE
                        h_vs_ss_check_result.error_message = error_msg

                if not irods_metadata.has_metadata():
                    error_msg = "No irods metadata"
                    i_vs_h_check_result.executed = False
                    h_vs_i_check_result.executed = False
                    i_vs_h_check_result.result = None
                    h_vs_i_check_result.result = None
                    i_vs_h_check_result.error_message.append(error_msg)
                    h_vs_i_check_result.error_message.append(error_msg)
                else:
                    irods_diff_header = irods_metadata.difference(
                        header_metadata)
                    header_diff_irods = header_metadata.difference(
                        irods_metadata)
                    if irods_diff_header:
                        error_msg = "Differences: %s" % irods_diff_header
                        i_vs_h_check_result.error_message = error_msg
                        i_vs_h_check_result.result = RESULT.FAILURE

                    if header_diff_irods:
                        error_msg = "Differences between what is in the header and not in iRODS: %s" % header_diff_irods
                        h_vs_i_check_result.error_message = error_msg
                        h_vs_i_check_result.result = RESULT.FAILURE

            issues_dict[fpath].append(ss_vs_h_check_result)
            issues_dict[fpath].append(h_vs_ss_check_result)
            issues_dict[fpath].append(i_vs_h_check_result)
            issues_dict[fpath].append(h_vs_i_check_result)