def _check_by_comparison_entities_fetched_by_different_id_types( cls, query_results: List[SeqscapeEntityQueryAndResults]) -> List: check_result = CheckResult( check_name=CHECK_NAMES. check_entities_in_seqscape_fetched_by_different_ids) for i in range(1, len(query_results)): entities_1 = query_results[i - 1] entities_2 = query_results[i] if not (set(entities_1.entities_fetched).issubset( set(entities_2.entities_fetched)) or set(entities_2.entities_fetched).issubset( set(entities_1.entities_fetched))): id_type_1 = entities_1.query_id_type id_type_2 = entities_2.query_id_type diff_1 = set(entities_1.entities_fetched).difference( set(entities_2.entities_fetched)) diff_2 = set(entities_2.entities_fetched).difference( set(entities_1.entities_fetched)) error_message = "" if diff_1: error_message = "Extra %s found when querying by %s compared to %s: %s." % ( entities_1.query_entity_type, id_type_1, id_type_2, diff_1) if diff_2: error_message += "Extra %s found when querying by %s compared to %s: %s." % ( entities_2.query_entity_type, id_type_2, id_type_1, diff_2) if not diff_2 and not diff_1: raise ValueError( "Somehow the entity sets are different, but I can't detect any difference." ) check_result.error_message = error_message check_result.result = RESULT.FAILURE return check_result
def validate_fields(self): check_results = [] checksum_check_result = CheckResult( check_name=CHECK_NAMES.check_replica_checksum_valid, severity=SEVERITY.IMPORTANT) try: is_valid_checksum = self._is_checksum_valid(self.checksum) if not is_valid_checksum: checksum_check_result.result = RESULT.FAILURE checksum_check_result.error_message = "The checksum looks invalid: " + str( self.checksum) except TypeError as e: checksum_check_result.result = RESULT.FAILURE checksum_check_result.error_message = "The checksum looks invalid: " + str( self.checksum) valid_replicas_check_result = CheckResult( check_name=CHECK_NAMES.check_replica_number, severity=SEVERITY.WARNING) if not self._is_replica_nr_valid(self.replica_nr): valid_replicas_check_result.result = RESULT.FAILURE valid_replicas_check_result.error_message = "The replica number looks invalid: " + str( self.replica_nr) check_results.append(checksum_check_result) check_results.append(valid_replicas_check_result) return check_results
def check_all_ids_were_found(self) -> List: ids_missing = self._find_missing_ids() check_result = CheckResult(check_name=CHECK_NAMES.check_all_irods_ids_found_in_seqscape) if ids_missing: check_result.error_message="The following ids weren't found in SequencescapeDB: %s " % ids_missing check_result.result = RESULT.FAILURE return check_result
def _check_by_comparison_entities_fetched_by_different_id_types(cls, query_results: List[ SeqscapeEntityQueryAndResults]) -> List: check_result = CheckResult(check_name=CHECK_NAMES.check_entities_in_seqscape_fetched_by_different_ids) for i in range(1, len(query_results)): entities_1 = query_results[i - 1] entities_2 = query_results[i] if not (set(entities_1.entities_fetched).issubset(set(entities_2.entities_fetched)) or set(entities_2.entities_fetched).issubset(set(entities_1.entities_fetched))): id_type_1 = entities_1.query_id_type id_type_2 = entities_2.query_id_type diff_1 = set(entities_1.entities_fetched).difference(set(entities_2.entities_fetched)) diff_2 = set(entities_2.entities_fetched).difference(set(entities_1.entities_fetched)) error_message = "" if diff_1: error_message = "Extra %s found when querying by %s compared to %s: %s." % ( entities_1.query_entity_type, id_type_1, id_type_2, diff_1) if diff_2: error_message += "Extra %s found when querying by %s compared to %s: %s." % ( entities_2.query_entity_type, id_type_2, id_type_1, diff_2) if not diff_2 and not diff_1: raise ValueError("Somehow the entity sets are different, but I can't detect any difference.") check_result.error_message=error_message check_result.result = RESULT.FAILURE return check_result
def check_no_duplicates_found(self) -> List: check_result = CheckResult(check_name=CHECK_NAMES.check_for_duplicated_ids_within_seqscape) ids_dupl = self._find_duplicated_ids() if ids_dupl: entities_dupl = [ent for ent in self.entities_fetched if getattr(ent, self.query_id_type) in ids_dupl] check_result.error_message="The following ids: %s are duplicated - entities: %s" % (ids_dupl, entities_dupl) check_result.result = RESULT.FAILURE return check_result
def check_all_ids_were_found(self) -> List: ids_missing = self._find_missing_ids() check_result = CheckResult( check_name=CHECK_NAMES.check_all_irods_ids_found_in_seqscape) if ids_missing: check_result.error_message = "The following ids weren't found in SequencescapeDB: %s " % ids_missing check_result.result = RESULT.FAILURE return check_result
def test_decide_exit_status_when_some_failed(self): results = { 'path': [ CheckResult(CHECK_NAMES.check_all_id_types_present), CheckResult(CHECK_NAMES.check_all_id_types_present, result=RESULT.FAILURE) ] } self.assertEqual(decide_exit_status(results), 1)
def test_decide_exit_status_when_failed_irrelevant_tests(self): results = { 'path': [ CheckResult(CHECK_NAMES.check_all_id_types_present), CheckResult(CHECK_NAMES.check_for_samples_in_more_studies, result=RESULT.FAILURE) ] } self.assertEqual(decide_exit_status(results), 0)
def check_more_than_one_replicas(cls, replicas) -> CheckResult: check_result = CheckResult( check_name=CHECK_NAMES.check_more_than_one_replica, severity=SEVERITY.WARNING) if len(replicas) <= 1: check_result.executed = True check_result.result = RESULT.FAILURE check_result.error_message = "File has " + str( len(replicas)) + " replicas" return check_result
def check_checksum_in_meta_present(self): check_result = CheckResult( check_name=CHECK_NAMES.check_checksum_in_metadata_present, severity=SEVERITY.WARNING) if self.checksum_in_meta: check_result.result = RESULT.SUCCESS else: check_result.result = RESULT.FAILURE check_result.error_message = "Missing checksum from metadata" return check_result
def _check_for_invalid_ids(cls, multi_ids_dict: typing.Dict, entity_type: str): check_result = CheckResult(check_name=CHECK_NAMES.check_valid_ids, error_message=[]) if not multi_ids_dict: check_result.result = RESULT.FAILURE check_result.error_message.append("No ids found.") for k, values in multi_ids_dict.items(): wrong_ids = [id for id in values if not cls._is_id_valid(id)] if wrong_ids: check_result.error_message.append("Invalid " + str(k) + "(s) for " + str(entity_type) + ": " + str(wrong_ids)) check_result.result = RESULT.FAILURE return check_result
def test_decide_exit_status_when_all_pass(self): results = { '/path': [ CheckResult(CHECK_NAMES.check_all_id_types_present), CheckResult(CHECK_NAMES.check_for_samples_in_more_studies), CheckResult(CHECK_NAMES.check_all_replicas_same_checksum), CheckResult( CHECK_NAMES. check_entities_in_seqscape_fetched_by_different_ids) ] } self.assertEqual(decide_exit_status(results), 0)
def test_group_by_executed_when_all_executed(self): check_res1 = CheckResult(check_name="Some check", executed=True, result=RESULT.SUCCESS, severity=SEVERITY.IMPORTANT) check_res2 = CheckResult(check_name="Some check", executed=True, result=RESULT.SUCCESS, severity=SEVERITY.IMPORTANT) check_results = [check_res1, check_res2] res = CheckResultsProcessing.group_by_executed(check_results) expected = {True: check_results} self.assertSetEqual(set(res), set(expected))
def _check_entities_have_all_types_of_ids(entity_list, mandatory_id_types, entity_type): check_result = CheckResult(check_name=CHECK_NAMES.check_all_id_types_present) for entity in entity_list: missing_id_types = [] for id_type in mandatory_id_types: if not getattr(entity, id_type): missing_id_types.append(id_type) if missing_id_types: present_ids = tuple(set(mandatory_id_types).difference(set(missing_id_types))) present_id_vals = [getattr(entity, id) for id in present_ids] check_result.error_message='Missing %s %s from %s: %s' % (entity_type, missing_id_types, entity_type, present_id_vals) check_result.result = RESULT.FAILURE return check_result
def check_no_duplicates_found(self) -> List: check_result = CheckResult( check_name=CHECK_NAMES.check_for_duplicated_ids_within_seqscape) ids_dupl = self._find_duplicated_ids() if ids_dupl: entities_dupl = [ ent for ent in self.entities_fetched if getattr(ent, self.query_id_type) in ids_dupl ] check_result.error_message = "The following ids: %s are duplicated - entities: %s" % ( ids_dupl, entities_dupl) check_result.result = RESULT.FAILURE return check_result
def test_group_by_result(self): check_res1 = CheckResult(check_name='Some check1', executed=False, result=RESULT.SUCCESS, severity=SEVERITY.WARNING) check_res2 = CheckResult(check_name='Some check1', executed=True, result=RESULT.FAILURE, severity=SEVERITY.IMPORTANT) check_results = [check_res1, check_res2] res = CheckResultsProcessing.group_by_result(check_results) expected = {RESULT.SUCCESS: [check_res1], RESULT.FAILURE: [check_res2]} self.assertDictEqual(res, expected)
def check_read_permission_exists_for_ss_group( cls, acls) -> List[CheckResult]: """ Checks if any of the ACLs is for an ss group. :param acls: :return: """ # problems = [] check_result_read_permission = CheckResult( check_name=CHECK_NAMES.check_ss_irods_group_read_permission, severity=SEVERITY.WARNING) check_result_ss_group_present = CheckResult( check_name=CHECK_NAMES.check_there_is_ss_irods_group, severity=SEVERITY.WARNING) found_ss_gr_acl = False for acl in acls: if acl.provides_access_for_ss_group(): found_ss_gr_acl = True if not acl.provides_read_permission(): check_result_read_permission.result = RESULT.FAILURE check_result_read_permission.error_message = "ACL found: " + str( acl) break if not found_ss_gr_acl: check_result_ss_group_present.result = RESULT.FAILURE check_result_read_permission.result = RESULT.FAILURE return [ check_result_ss_group_present, check_result_read_permission ]
def test_group_by_executed_when_some_executed(self): check_res1 = CheckResult(check_name="Some check", executed=True, result=RESULT.SUCCESS, severity=SEVERITY.IMPORTANT) check_res2 = CheckResult(check_name="Some check", executed=False, result=None, severity=SEVERITY.IMPORTANT) check_results = [check_res1, check_res2] executed = [check_res1] not_executed = [check_res2] res = CheckResultsProcessing.group_by_executed(check_results) expected = {True: executed, False: not_executed} self.assertDictEqual(res, expected)
def format_output_as_json(check_results_by_path): """ This function takes as input a dict where key = filepath, value = list of CheckResults and formats them to json. :param check_results_by_path: dict - key = str (filepath), value = list[CheckResult] :return: json formatted string """ CheckResultJSONEncoder = MappingJSONEncoderClassBuilder(CheckResult, CheckResult.to_json_mapping()).build() return json.dumps(check_results_by_path, cls=CheckResultJSONEncoder)
def validate_fields(self) -> List: check_results = [] upl_checksum_check = self.check_checksum_at_upload_present() check_results.append(upl_checksum_check) meta_checksum_check = self.check_checksum_in_meta_present() check_results.append(meta_checksum_check) comp_check = CheckResult( check_name=CHECK_NAMES. check_by_comparison_checksum_in_meta_with_checksum_at_upload) if upl_checksum_check.result == RESULT.SUCCESS and meta_checksum_check.result == RESULT.SUCCESS: if self.checksum_in_meta != self.checksum_at_upload: comp_check.result = RESULT.FAILURE comp_check.error_message = "The checksum in metadata = %s different than checksum at upload = %s" % \ (self.checksum_in_meta, self.checksum_at_upload) else: comp_check.result = RESULT.SUCCESS else: comp_check.executed = False comp_check.result = None check_results.append(comp_check) check_npg_qc = self.check_npg_qc_field() check_results.append(check_npg_qc) check_target_field = self.check_target_field() check_results.append(check_target_field) return check_results
def setUp(self): self.check_result_as_json_dict = { "check_name": _NAME, "executed": _EXEUCTED, "result": _RESULT, "severity": _SEVERITY, "error_message": _ERROR_MESSAGE } self.CheckResultJSONDecoder = MappingJSONDecoderClassBuilder(CheckResult, CheckResult.to_json_mapping()).build()
def _check_entities_have_all_types_of_ids(entity_list, mandatory_id_types, entity_type): check_result = CheckResult( check_name=CHECK_NAMES.check_all_id_types_present) for entity in entity_list: missing_id_types = [] for id_type in mandatory_id_types: if not getattr(entity, id_type): missing_id_types.append(id_type) if missing_id_types: present_ids = tuple( set(mandatory_id_types).difference(set(missing_id_types))) present_id_vals = [getattr(entity, id) for id in present_ids] check_result.error_message = 'Missing %s %s from %s: %s' % ( entity_type, missing_id_types, entity_type, present_id_vals) check_result.result = RESULT.FAILURE return check_result
def format_output_as_json(check_results_by_path): """ This function takes as input a dict where key = filepath, value = list of CheckResults and formats them to json. :param check_results_by_path: dict - key = str (filepath), value = list[CheckResult] :return: json formatted string """ CheckResultJSONEncoder = MappingJSONEncoderClassBuilder( CheckResult, CheckResult.to_json_mapping()).build() return json.dumps(check_results_by_path, cls=CheckResultJSONEncoder)
def check_attributes_have_the_right_frequency(cls, standard_attr_dict, actual_attr_dict): check_result = CheckResult( check_name=CHECK_NAMES.check_attribute_count, executed=True, result=RESULT.SUCCESS, error_message=[]) for attr, freq in standard_attr_dict.items(): if not attr in actual_attr_dict: check_result.result = RESULT.FAILURE check_result.error_message.append('Missing attribute %s' % attr) elif freq != actual_attr_dict[attr]: check_result.result = RESULT.FAILURE check_result.error_message.append( "Attribute %s should appear %s times and instead appears %s times" % (attr, freq, actual_attr_dict[attr])) return check_result
def check_samples_fetched_by_studies(self): check_result = CheckResult(check_name=CHECK_NAMES.check_samples_in_irods_same_as_samples_fetched_by_study_from_seqscape) #"Check if the sample ids in iRODS for a study belong to the same study in Sqeuencescape ") if not self.get_entities_by_type('study'): check_result.executed = False check_result.result = None return check_result samples_by_studies_set = set(self.get_all_entities_by_association_by_type('study', 'sample')) samples_set = set(self.get_entities_by_type('sample')) if not samples_set.issubset(samples_by_studies_set): diff_samples_wrong_study = samples_set.difference(samples_by_studies_set) error_msg = "Some samples don't appear under study(s): %s in Sequencescape, " \ "but they appear under this study in iRODS. Number of samples: %s, " \ "and ids: %s" % ([study.name for study in self.get_entities_by_type('study')], str(len(diff_samples_wrong_study)), [(s.name, s.accession_number) for s in diff_samples_wrong_study]) check_result.error_message = error_msg check_result.result = RESULT.FAILURE return check_result
def check_studies_fetched_by_samples(self): check_results = [] same_study_for_samples_check = CheckResult( check_name=CHECK_NAMES. check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples) #check_for_samples_in_more_studies = CheckResult(check_name=CHECK_NAMES.check_for_samples_in_more_studies, severity=SEVERITY.WARNING) if not self.get_entities_by_type('sample'): same_study_for_samples_check.executed = False same_study_for_samples_check.result = None # check_for_samples_in_more_studies.executed = False # check_for_samples_in_more_studies.result = None # check_results.append(check_for_samples_in_more_studies) check_results.append(same_study_for_samples_check) return check_results studies_by_samples_set = set( self.get_all_entities_by_association_by_type('sample', 'study')) studies_set = set(self.get_entities_by_type('study')) studies_set_names = [study.name for study in studies_set] studies_by_samples_set_names = [ study.name for study in studies_by_samples_set ] sample_set_ids = [(sample.name, sample.accession_number) for sample in self.get_entities_by_type('sample')] if not studies_set.issubset(studies_by_samples_set): error_msg = "For the %s given seqscape samples, the studies in iRODS: %s and the studies in Seqscape DISAGREE: %s" % ( str(len(sample_set_ids)), studies_set_names, studies_by_samples_set_names) same_study_for_samples_check.result = RESULT.FAILURE same_study_for_samples_check.error_message = error_msg else: diff_wrong_studies_for_samples_in_irods = studies_set.difference( studies_by_samples_set) if diff_wrong_studies_for_samples_in_irods: error_msg = "Studies in Seqscape and in iRODS for %s samples don't agree. Studies in iRODS and not in Seqscape: %s" % ( str(len(sample_set_ids)), diff_wrong_studies_for_samples_in_irods) same_study_for_samples_check.result = RESULT.FAILURE same_study_for_samples_check.error_message = error_msg check_results.append(same_study_for_samples_check) # diff_sam_belongs2more_studies = studies_by_samples_set.difference(studies_set) # if diff_sam_belongs2more_studies: # error_msg = "Some samples belong to more than one study. For samples: %s we had these studies as metadata: %s and we found in Seqscape these studies: %s" % ( # sample_set_ids, # studies_set_names, # studies_by_samples_set_names) # check_for_samples_in_more_studies.result = RESULT.FAILURE # check_for_samples_in_more_studies.error_message = error_msg # check_results.append(check_for_samples_in_more_studies) return check_results
def check_attribute_count( self, avu_counts: List[AttributeCount]) -> List[CheckResult]: check_result = CheckResult( check_name=CHECK_NAMES.check_attribute_count, severity=SEVERITY.IMPORTANT) wrong_counts = [] for avu_count in avu_counts: actual_count = self.get_values_count_for_attribute( avu_count.attribute) threshold = avu_count.count if not self._is_true_comparison(actual_count, threshold, avu_count.operator): wrong_counts.append( "attribute %s should appear %s %s times and appears %s" % (avu_count.attribute, avu_count.operator, threshold, actual_count)) if wrong_counts: check_result.result = RESULT.FAILURE check_result.error_message = ','.join(wrong_counts) return check_result
def test_failed_check_results_when_all_successful(self): check_res11 = CheckResult(check_name='check1', executed=True, result=RESULT.SUCCESS, severity=SEVERITY.WARNING) check_res12 = CheckResult(check_name='check2', executed=True, result=RESULT.SUCCESS, severity=SEVERITY.WARNING) check_res21 = CheckResult(check_name='check1', executed=True, result=RESULT.SUCCESS, severity=SEVERITY.WARNING) check_res22 = CheckResult(check_name='check2', executed=True, result=RESULT.SUCCESS, severity=SEVERITY.WARNING) check_res31 = CheckResult(check_name='check1', executed=True, result=RESULT.SUCCESS, severity=SEVERITY.WARNING) check_res32 = CheckResult(check_name='check2', executed=True, result=RESULT.SUCCESS, severity=SEVERITY.WARNING) check_results_dict = { '/seq/1/1.cram': [check_res11, check_res12], '/seq/2/2.cram': [check_res21, check_res22], '/seq/3/3.cram': [check_res31, check_res32] } result = CheckResultsProcessing.failed_check_results_stats( check_results_dict) expected = {} self.assertDictEqual(result, expected)
def check_reference(self, desired_ref_name: str) -> List[CheckResult]: check_result = CheckResult( check_name=CHECK_NAMES.check_desired_reference) check_result.error_message = [] if not self.get_references(): check_result.result = None check_result.executed = False check_result.error_message.append( "Missing reference from the metadata") if not desired_ref_name: check_result.result = None check_result.executed = False check_result.error_message.append( "Missing desired reference parameter") if not check_result.error_message: for ref in self.get_references(): if ref.lower().find(desired_ref_name.lower()) == -1: check_result.result = RESULT.FAILURE check_result.error_message = "The desired reference is: %s is different thant the metadata reference: %s" % ( desired_ref_name, ref) return check_result
def check_npg_qc_field(self): check_npg_qc = CheckResult(check_name=CHECK_NAMES.check_npg_qc_field) if self.get_npg_qc() is None: check_npg_qc.result = RESULT.FAILURE check_npg_qc.error_message = "Missing npg_qc field" elif not self._is_npg_qc_valid(self.get_npg_qc()): check_npg_qc.error_message = "This npg_qc field looks invalid: " + str( self.get_npg_qc()) check_npg_qc.result = RESULT.FAILURE return check_npg_qc
def check_checksum_at_upload_present(self): check_result = CheckResult( check_name=CHECK_NAMES.check_checksum_at_upload_present) if self.checksum_at_upload: if type(self.checksum_at_upload) is Iterable and len( set(self.checksum_at_upload)) > 1: check_result.result = RESULT.FAILURE check_result.error_message = "Different checksum values at upload (ichksum)" else: check_result.result = RESULT.SUCCESS else: check_result.result = RESULT.FAILURE check_result.error_message = "Missing checksum from ichksum result" return check_result
def check_target_field(self): check_target_field = CheckResult( check_name=CHECK_NAMES.check_target_field) if self.get_target() is None: check_target_field.result = RESULT.FAILURE check_target_field.error_message = "Missing target field" elif not self._is_target_valid(self.get_target()): check_target_field.error_message = "The target field looks invalid: " + str( self.get_target()) check_target_field.result = RESULT.FAILURE return check_target_field
def validate_fields(self): check_results = [] zone_check_result = CheckResult( check_name=CHECK_NAMES.check_irods_zone_within_acl, severity=SEVERITY.WARNING) if not self._is_irods_zone_valid(self.zone): zone_check_result.result = RESULT.FAILURE zone_check_result.error_message = "The iRODS zone seems wrong: " + str( self.zone) + " in acl = " + str(self) check_results.append(zone_check_result) permission_check_result = CheckResult( check_name=CHECK_NAMES.check_irods_permission_within_acl, severity=SEVERITY.WARNING) if not self._is_permission_valid(self.permission): permission_check_result.result = RESULT.FAILURE permission_check_result.error_message = "The iRODS permission seems wrong: " + str( self.permission) + " in acl = " + str(self) check_results.append(permission_check_result) return check_results
def validate_fields(self): check_results = [] zone_check_result = CheckResult(check_name=CHECK_NAMES.check_irods_zone_within_acl, severity=SEVERITY.WARNING) if not self._is_irods_zone_valid(self.zone): zone_check_result.result = RESULT.FAILURE zone_check_result.error_message="The iRODS zone seems wrong: " + str(self.zone) + " in acl = " + str(self) check_results.append(zone_check_result) permission_check_result = CheckResult(check_name=CHECK_NAMES.check_irods_permission_within_acl, severity=SEVERITY.WARNING) if not self._is_permission_valid(self.permission): permission_check_result.result = RESULT.FAILURE permission_check_result.error_message = "The iRODS permission seems wrong: " + str(self.permission) + " in acl = " + str(self) check_results.append(permission_check_result) return check_results
def validate_fields(self): check_results = [] checksum_check_result = CheckResult(check_name=CHECK_NAMES.check_replica_checksum_valid, severity=SEVERITY.IMPORTANT) try: is_valid_checksum = self._is_checksum_valid(self.checksum) if not is_valid_checksum: checksum_check_result.result = RESULT.FAILURE checksum_check_result.error_message = "The checksum looks invalid: " + str(self.checksum) except TypeError as e: checksum_check_result.result = RESULT.FAILURE checksum_check_result.error_message = "The checksum looks invalid: " + str(self.checksum) valid_replicas_check_result = CheckResult(check_name=CHECK_NAMES.check_replica_number, severity=SEVERITY.WARNING) if not self._is_replica_nr_valid(self.replica_nr): valid_replicas_check_result.result = RESULT.FAILURE valid_replicas_check_result.error_message = "The replica number looks invalid: " + str(self.replica_nr) check_results.append(checksum_check_result) check_results.append(valid_replicas_check_result) return check_results
def check_studies_fetched_by_samples(self): check_results = [] same_study_for_samples_check = CheckResult(check_name=CHECK_NAMES.check_studies_in_irods_with_studies_in_seqscape_fetched_by_samples) #check_for_samples_in_more_studies = CheckResult(check_name=CHECK_NAMES.check_for_samples_in_more_studies, severity=SEVERITY.WARNING) if not self.get_entities_by_type('sample'): same_study_for_samples_check.executed = False same_study_for_samples_check.result = None # check_for_samples_in_more_studies.executed = False # check_for_samples_in_more_studies.result = None # check_results.append(check_for_samples_in_more_studies) check_results.append(same_study_for_samples_check) return check_results studies_by_samples_set = set(self.get_all_entities_by_association_by_type('sample', 'study')) studies_set = set(self.get_entities_by_type('study')) studies_set_names = [study.name for study in studies_set] studies_by_samples_set_names = [study.name for study in studies_by_samples_set] sample_set_ids = [(sample.name, sample.accession_number) for sample in self.get_entities_by_type('sample')] if not studies_set.issubset(studies_by_samples_set): error_msg = "For the %s given seqscape samples, the studies in iRODS: %s and the studies in Seqscape DISAGREE: %s" % (str(len(sample_set_ids)), studies_set_names, studies_by_samples_set_names) same_study_for_samples_check.result = RESULT.FAILURE same_study_for_samples_check.error_message=error_msg else: diff_wrong_studies_for_samples_in_irods = studies_set.difference(studies_by_samples_set) if diff_wrong_studies_for_samples_in_irods: error_msg = "Studies in Seqscape and in iRODS for %s samples don't agree. Studies in iRODS and not in Seqscape: %s" % ( str(len(sample_set_ids)), diff_wrong_studies_for_samples_in_irods) same_study_for_samples_check.result = RESULT.FAILURE same_study_for_samples_check.error_message = error_msg check_results.append(same_study_for_samples_check) # diff_sam_belongs2more_studies = studies_by_samples_set.difference(studies_set) # if diff_sam_belongs2more_studies: # error_msg = "Some samples belong to more than one study. For samples: %s we had these studies as metadata: %s and we found in Seqscape these studies: %s" % ( # sample_set_ids, # studies_set_names, # studies_by_samples_set_names) # check_for_samples_in_more_studies.result = RESULT.FAILURE # check_for_samples_in_more_studies.error_message = error_msg # check_results.append(check_for_samples_in_more_studies) return check_results
def check_all_replicas_have_same_checksum(cls, replicas) -> CheckResult: result = CheckResult( check_name=CHECK_NAMES.check_all_replicas_same_checksum, severity=SEVERITY.IMPORTANT) if not replicas: result.executed = False result.error_message = ["No replicas to compare with."] result.result = None return result first_replica = replicas[0] error_message = '' for replica in replicas: if not replica.checksum == first_replica.checksum: result.result = RESULT.FAILURE error_message += "Replica: " + str( replica ) + " has different checksum than replica: " + str( first_replica) if error_message: result.error_message = error_message return result
def checksum_comparison_check(self): check_result = CheckResult( check_name=CHECK_NAMES. check_by_comparison_checksum_in_meta_with_checksum_at_upload, error_message=[]) impossible_to_exec = False if not self.checksum_at_upload: check_result.executed = False check_result.error_message.append("Missing ichecksum result.") impossible_to_exec = True if not self.checksum_in_meta: check_result.executed = False check_result.error_message.append("Missing checksum from metadata") impossible_to_exec = True if not impossible_to_exec and self.checksum_in_meta != self.checksum_at_upload: check_result.result = RESULT.FAILURE check_result.error_message = "The checksum in metadata = %s different than checksum at upload = %s" % \ (self.checksum_in_meta, self.checksum_at_upload) return check_result
def test_failed_check_results_stats(self): check_res11 = CheckResult(check_name='Some check1', executed=True, result=RESULT.FAILURE, severity=SEVERITY.WARNING) check_res12 = CheckResult(check_name='Some check2', executed=True, result=RESULT.FAILURE, severity=SEVERITY.WARNING) check_res13 = CheckResult(check_name='Some check3', executed=True, result=RESULT.FAILURE, severity=SEVERITY.WARNING) check_res21 = CheckResult(check_name='Some check1', executed=True, result=RESULT.FAILURE, severity=SEVERITY.WARNING) check_res22 = CheckResult(check_name='Some check2', executed=True, result=RESULT.FAILURE, severity=SEVERITY.WARNING) check_res23 = CheckResult(check_name='Some check3', executed=True, result=RESULT.FAILURE, severity=SEVERITY.WARNING) check_results_dict = { '/seq/123/123.cram': [check_res21, check_res22, check_res23], '/seq/456/456.cram': [check_res11, check_res12, check_res13] } expected_counter = { 'Some check1': 2, 'Some check2': 2, 'Some check3': 2 } result = CheckResultsProcessing.failed_check_results_stats( check_results_dict) self.assertDictEqual(expected_counter, result)
def setUp(self): self.check_result = CheckResult( _NAME, executed=_EXEUCTED, result=_RESULT, severity=_SEVERITY, error_message=_ERROR_MESSAGE) self.CheckResultJSONEncoder = MappingJSONEncoderClassBuilder(CheckResult, CheckResult.to_json_mapping()).build()