def check_nucleotides(self, check_set=set(), eval_id=None, success="correct", fail="error", eval_def=None): """Check if all nucleotides in the sequence are expected. :param check_set: Set of reference nucleotides. :type check_set: set :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ # When Biopython SeqIO parses the GenBank record, it automatically # determines that it is a DNA sequence. It assigns the Seq object # alphabet as IUPACAmbiguousDNA. The alphabet could be coerced # to a different alphabet, and then tested using the # Bio.Alphabet._verify_alphabet() function. Since this is a private # function though, it is not clear how stable/reliable it is. # Instead, Bio.Alphabet.IUPAC.unambiguous_dna alphabet can be passed # to the check_nucleotides method. nucleotide_set = set(self.seq) nucleotide_error_set = nucleotide_set - check_set if len(nucleotide_error_set) > 0: nes_string = basic.join_strings(nucleotide_error_set, delimiter=", ") result = ("There are unexpected nucleotides in the sequence: " f"{nes_string}.") status = fail else: result = "There are no unexpected nucleotides in the sequence." status = success definition = "Check if all nucleotides in the sequence are expected." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_authors(self, check_set=set(), expect=True, eval_id=None, success="correct", fail="error", eval_def=None): """Check author list. Evaluates whether at least one author in the in the list of authors is present in a set of reference authors. :param check_set: Set of reference authors. :type check_set: set :param expect: Indicates whether at least one author in the list of authors is expected to be present in the check set. :type expect: bool :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ authors = self.authors.lower() authors = authors.replace(";", ",") authors = authors.replace(" ", ",") authors_list = authors.split(",") authors_list = [x.strip() for x in authors_list] authors_set = set(authors_list) mutual_authors_set = authors_set & check_set if len(mutual_authors_set) == 0: missing_set = check_set - authors_set missing_string = basic.join_strings(missing_set, delimiter=", ") result = ("The following authors are not " f"listed: {missing_string}. This is ") if expect: result = result + "not expected." status = fail else: result = result + "expected." status = success else: mas_string = basic.join_strings(mutual_authors_set, delimiter=", ") result = f"The following authors are listed: {mas_string}. This is " if expect: result = result + "expected." status = success else: result = result + "not expected." status = fail definition = "Check authorship." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_genome_pair_dict(self, key, expect=True, eval_id=None, success="correct", fail="error", eval_def=None): """Check if a genome_pair is present in the genome_pair dictionary. :param key: same as for check_genome_dict(). :param expect: same as for check_genome_dict(). :param eval_id: same as for check_genome_dict(). :param success: same as for check_genome_dict(). :param fail: same as for check_genome_dict(). :param eval_def: same as for check_genome_dict(). """ if key in self.genome_pair_dict.keys(): result = f"The '{key}' genome_pair is present." if expect: status = success else: status = fail else: result = f"The '{key}' genome_pair is not present." if not expect: status = success else: status = fail definition = "Check if a genome_pair is present." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_cds_end_orient_ids(self, eval_id=None, success="correct", fail="error", eval_def=None): """Check if there are any duplicate transcription end-orientation coordinates. Duplicated transcription end-orientation coordinates may represent unintentional duplicate CDS features with slightly different start coordinates. :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ if len(self._cds_duplicate_end_orient_ids) > 0: result = ("There are multiple CDS features with the same " "transcription end coordinate and orientation.") status = fail else: result = ("All CDS features contain unique orientation and " "transcription end coordinate information.") status = success definition = ("Check whether CDS features can be uniquely " "identified by their orientation and transcription end " "coordinate.") definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_eval_flags(self, expect=True, eval_id=None, success="correct", fail="error", eval_def=None): """Check that the eval_flags is valid. :param expect: Indicates whether the eval_flags is expected to contain data. :type expect: bool :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ keys = len(self.eval_flags) msg = f"There are {keys} eval flags present, which is " if keys > 0 and expect: output = True elif keys == 0 and not expect: output = True else: output = False if output: result = msg + "expected." status = success else: result = msg + "not expected." status = fail definition = "Check if there are eval flags." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_compatible_cluster_and_subcluster(self, eval_id=None, success="correct", fail="error", eval_def=None): """Check compatibility of cluster and subcluster attributes. :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ result = (f"The Cluster is '{self.cluster}', " f"the Subcluster is '{self.subcluster}', and they are ") output = basic.compare_cluster_subcluster(self.cluster, self.subcluster) if not output: result = result + "not compatible." status = fail else: result = result + "compatible." status = success definition = "Check for compatibility between Cluster and Subcluster." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_subcluster_structure(self, eval_id=None, success="correct", fail="error", eval_def=None): """Check whether the subcluster attribute is structured appropriately. :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ result = f"The Subcluster is '{self.subcluster}'. " if (self.subcluster != "none" and self.subcluster != ""): result = result + "It is structured " left, right = basic.split_string(self.subcluster) if (left.isalpha() == False or right.isdigit() == False): result = result + "incorrectly." status = fail else: result = result + "correctly." status = success else: result = result + "It is empty." status = "untested" definition = "Check if the Subcluster attribute is structured correctly." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_compatible_type_and_data_retain(self, eval_id=None, success="correct", fail="error", eval_def=None): """Check if the ticket type and data_retain are compatible. If the ticket type is 'add', then the data_retain set is not expected to have any data. :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ msg = (f"The ticket type is '{self.type}' and " f"there are {len(self.data_retain)} values in the database " "that are set to be retained, which is ") if (self.type == "add" and len(self.data_retain) > 0): result = msg + "not expected." status = fail else: result = msg + "expected." status = success definition = ("Check if the ticket type and data_retain setting " "are compatible.") definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_statements(self, execute_result, execute_msg, eval_id=None, success="correct", fail="error", eval_def=None): """Check if MySQL statements were successfully executed. :param execute_result: Indication if MySQL statements were successfully execute. :type execute_result: int :param execute_msg: Description of MySQL statement execution result. :type execute_msg: str :param eval_id: same as for check_genome_dict(). :param success: same as for check_genome_dict(). :param fail: same as for check_genome_dict(). :param eval_def: same as for check_genome_dict(). """ if execute_result == 0: status = success else: status = fail definition = "Check if MySQL statements were successfully executed." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, execute_msg, status)
def compare_two_attributes(self, attribute1, attribute2, expect_same=False, eval_id=None, success="correct", fail="error", eval_def=None): """Determine if two attributes are the same. :param attribute1: First attribute to compare. :type attribute1: str :param attribute2: Second attribute to compare. :type attribute2: str :param expect_same: Indicates whether the two attribute values are expected to be the same. :type expect_same: bool :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ try: test = True value1 = getattr(self, attribute1) value2 = getattr(self, attribute2) except: test = False value1 = None value2 = None if test: if value1 == value2: actual_same = True else: actual_same = False v1_short = basic.truncate_value(str(value1), 30, "...") v2_short = basic.truncate_value(str(value2), 30, "...") result = (f"The '{attribute1}' attribute contains: '{v1_short}'. " f"The '{attribute2}' attribute contains: '{v2_short}'. " "These two values are ") if actual_same: result = result + "identical, " else: result = result + "different, " if actual_same and expect_same: result = result + "as expected." status = success elif not actual_same and not expect_same: result = result + "as expected." status = success else: result = result + "which is not expected." status = fail else: result = (f"'{attribute1}' and/or '{attribute2}' is " "not a valid field to be compared.") status = "untested" definition = f"Compare values of '{attribute1}' and '{attribute2}' attributes." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_valid_data_source(self, ref_set_attr, check_set, eval_id=None, success="correct", fail="error", eval_def=None): """Check that the values in the specified attribute are valid. :param ref_set_attr: Name of the data_dict in the ticket to be evaluated (data_add, data_retain, data_retrieve, data_parse) :type ref_set_attr: str :param check_set: Set of valid field names. :type check_set: set :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ if ref_set_attr == "data_add": ref_set = self.data_add elif ref_set_attr == "data_retain": ref_set = self.data_retain elif ref_set_attr == "data_retrieve": ref_set = self.data_retrieve elif ref_set_attr == "data_parse": ref_set = self.data_parse else: ref_set = None if ref_set is not None: invalid_values = ref_set - check_set msg = f"The '{ref_set_attr}' field is ." if len(invalid_values) == 0: result = "populated correctly." status = success else: invalid_string = basic.join_strings(invalid_values, delimiter=", ") result = ( "not populated correctly. The following " f"values are not permitted in '{ref_set_attr}': " f"{invalid_string}") status = fail else: result = f"'{ref_set_attr}' is not a valid attribute to be evaluated." status = fail definition = f"Check if {ref_set_attr} field is correctly populated." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_magnitude(self, attribute, expect, ref_value, eval_id=None, success="correct", fail="error", eval_def=None): """Check that the magnitude of a numerical attribute is valid. :param attribute: same as for check_attribute(). :param expect: Comparison symbol indicating direction of magnitude (>, =, <). :type expect: str :param ref_value: Numerical value for comparison. :type ref_value: int, float, datetime :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ try: test = True query_value = getattr(self, attribute) except: test = False query_value = None if test: result = f"The {attribute} value {query_value} is " if query_value > ref_value: compare = ">" result = result + "greater than " elif query_value == ref_value: compare = "=" result = result + "equal to " else: compare = "<" result = result + "less than " result = result + f"{ref_value}, which is " if compare == expect: result = result + "expected." status = success else: result = result + "not expected." status = fail else: result = f"'{attribute}' is not a valid attribute to be evaluated." status = "untested" definition = f"Check the magnitude of the '{attribute}' attribute." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_attribute(self, attribute, check_set, expect=False, eval_id=None, success="correct", fail="error", eval_def=None): """Check that the attribute value is valid. :param attribute: Name of the Genome object attribute to evaluate. :type attribute: str :param check_set: Set of reference ids. :type check_set: set :param expect: Indicates whether the attribute value is expected to be present in the check set. :type expect: bool :param eval_id: Unique identifier for the evaluation. :type eval_id: str :param success: Default status if the outcome is a success. :type success: str :param fail: Default status if the outcome is not a success. :type fail: str :param eval_def: Description of the evaluation. :type eval_def: str """ try: test = True value1 = getattr(self, attribute) except: test = False value1 = None if test: value1_short = basic.truncate_value(str(value1), 30, "...") result = f"The {attribute} value '{value1_short}' is " value2 = basic.check_value_expected_in_set(value1, check_set, expect) if value2: result = result + "valid." status = success else: result = result + "not valid." status = fail else: result = f"'{attribute}' is not a valid attribute to be evaluated." status = "untested" definition = f"Check the value of the '{attribute}' attribute." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def compare_date(self, expect, eval_id=None, success="correct", fail="error", eval_def=None): """Compare the date of each genome. :param expect: Is the first genome expected to be "newer", "equal", or "older" than the second genome. :type expect: str :param eval_id: same as for compare_attribute(). :param success: same as for compare_attribute(). :param fail: same as for compare_attribute(). :param eval_def: same as for compare_attribute(). """ if expect in set(["newer", "equal", "older"]): if self.genome1.date > self.genome2.date: actual = "newer" actual2 = actual + "than" elif self.genome1.date == self.genome2.date: actual = "equal" actual2 = actual + "to" else: actual = "older" actual2 = actual + "than" msg = (f"The query genome '{self.genome1.id}' date " f"is '{self.genome1.date}'." f"The reference genome '{self.genome2.id}' date " f"is '{self.genome2.date}'." f"The date of query genome is {actual2} the " "date of the reference genome, which is ") if actual == expect: result = msg + "expected." status = success else: result = msg + "not expected." status = fail else: result = f"'{expect}' is an invalid comparison." status = "untested" definition = "Compare the date of both genomes." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_genome_dict(self, key, expect=True, eval_id=None, success="correct", fail="error", eval_def=None): """Check if a genome is present in the genome dictionary. :param key: The value to be evaluated if it is a valid key in the genome dictionary. :type key: str :param expect: Indicates whether the key is expected to be a valid key in the genome dictionary. :type expect: bool :param eval_id: Unique identifier for the evaluation. :type eval_id: str :param success: Default status if the outcome is a success. :type success: str :param fail: Default status if the outcome is not a success. :type fail: str :param eval_def: Description of the evaluation. :type eval_def: str """ if key in self.genome_dict.keys(): result = f"The '{key}' genome is present." if expect: status = success else: status = fail else: result = f"The '{key}' genome is not present." if not expect: status = success else: status = fail definition = "Check if a genome is present." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_ticket(self, eval_id=None, success="correct", fail="error", eval_def=None): """Check for whether a Ticket object is present. :param eval_id: same as for check_genome_dict(). :param success: same as for check_genome_dict(). :param fail: same as for check_genome_dict(). :param eval_def: same as for check_genome_dict(). """ if self.ticket is not None: result = ("A ticket is present. " f"ID: {self.ticket.id}. " f"Type: {self.ticket.type}. " f"PhageID: {self.ticket.phage_id}.") status = success else: result = "A ticket is not present." status = fail definition = "Check if a ticket is present." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def compare_attribute(self, attribute, expect_same=False, eval_id=None, success="correct", fail="error", eval_def=None): """Compare values of the specified attribute in each genome. :param attribute: Name of the GenomePair object attribute to evaluate. :type attribute: str :param expect_same: Indicates whether the two attribute values are expected to be the same. :type expect_same: bool :param eval_id: Unique identifier for the evaluation. :type eval_id: str :param success: Default status if the outcome is a success. :type success: str :param fail: Default status if the outcome is not a success. :type fail: str :param eval_def: Description of the evaluation. :type eval_def: str """ try: test = True value1 = getattr(self.genome1, attribute) value2 = getattr(self.genome2, attribute) except: test = False value1 = None value2 = None if test: if value1 == value2: actual_same = True else: actual_same = False v1_short = basic.truncate_value(str(value1), 30, "...") v2_short = basic.truncate_value(str(value2), 30, "...") result = ( f"The first genome is ID: {self.genome1.id}, " f"Type: {self.genome1.type}. The '{attribute}' attribute " f" contains: '{v1_short}'. " f"The second genome is ID: {self.genome2.id}, " f"Type: {self.genome2.type}. The '{attribute}' attribute " f" contains: '{v2_short}'. These two values are ") if actual_same: result = result + "identical, " else: result = result + "different, " if actual_same and expect_same: result = result + "as expected." status = success elif not actual_same and not expect_same: result = result + "as expected." status = success else: result = result + "which is not expected." status = fail else: result = f"'{attribute}' is not a valid field to be compared." status = "untested" definition = f"Compare values of the '{attribute}' attribute in each genome." definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)
def check_feature_coordinates(self, use_cds=False, use_trna=False, use_tmrna=False, other=None, strand=False, eval_id=None, success="correct", fail="error", eval_def=None): """Identify nested, duplicated, or partially-duplicated features. :param use_cds: Indicates whether ids for CDS features should be generated. :type use_cds: bool :param use_trna: Indicates whether ids for tRNA features should be generated. :type use_trna: bool :param use_tmrna: Indicates whether ids for tmRNA features should be generated. :type use_tmrna: bool :param other: List of features that should be included. :type other: list :param strand: Indicates if feature orientation should be included. :type strand: bool :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ unsorted_feature_lists = [] unsorted_features = [] ftr_types = set() if use_cds: ftr_types.add("cds") unsorted_features.extend(self.cds_features) if use_trna: ftr_types.add("trna") unsorted_features.extend(self.trna_features) if use_tmrna: ftr_types.add("tmrna") unsorted_features.extend(self.tmrna_features) if other is not None: ftr_types.add("other") unsorted_features.extend(other) if strand: s_info = "were" unsorted_f_features = [] # Forward orientation unsorted_r_features = [] # Reverse orientation for index in range(len(unsorted_features)): feature = unsorted_features[index] strand = basic.reformat_strand(feature.orientation, format="fr_short") if strand == "f": unsorted_f_features.append(feature) else: unsorted_r_features.append(feature) unsorted_feature_lists.append(unsorted_f_features) unsorted_feature_lists.append(unsorted_r_features) else: s_info = "were not" unsorted_feature_lists.append(unsorted_features) ft_string = basic.join_strings(ftr_types, delimiter=", ") result = ( f"The following types of features were evaluated: {ft_string}. " f"Features {ft_string} separately grouped " "by orientation for evaluation. ") msgs = ["There are one or more errors with the feature coordinates."] for unsorted_features in unsorted_feature_lists: sorted_features = sorted(unsorted_features, key=attrgetter("start", "stop")) index = 0 while index < len(sorted_features) - 1: current = sorted_features[index] next = sorted_features[index + 1] ftrs = (f"Feature1 ID: {current.id}, " f"start coordinate: {current.start}, " f"stop coordinate: {current.stop}, " f"orientation: {current.orientation}. " f"Feature2 ID: {next.id}, " f"start coordinate: {next.start}, " f"stop coordinate: {next.stop}, " f"orientation: {next.orientation}. ") if (current.start == next.start and current.stop == next.stop): msgs.append(ftrs) msgs.append("Feature1 and Feature2 contain identical " "start and stop coordinates.") # To identify nested features, the following tests # avoid false errors due to genes that may wrap around the # genome. elif (current.start < next.start and current.start < next.stop and current.stop > next.start and current.stop > next.stop): msgs.append(ftrs) msgs.append("Feature2 is nested within Feature1.") elif (current.start == next.start and basic.reformat_strand( current.orientation, format="fr_short") == "r" and basic.reformat_strand(next.orientation, format="fr_short") == "r"): msgs.append(ftrs) msgs.append(("Feature1 and Feature2 contain " "identical stop coordinates.")) elif (current.stop == next.stop and basic.reformat_strand( current.orientation, format="fr_short") == "f" and basic.reformat_strand(next.orientation, format="fr_short") == "f"): msgs.append(ftrs) msgs.append(("Feature1 and Feature2 contain " "identical stop coordinates.")) else: pass index += 1 if len(msgs) > 1: result = result + " ".join(msgs) status = fail else: result = result + "The feature coordinates are correct." status = success definition = ("Check if there are any feature coordinate conflicts.") definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)