def set_seqfeature(self): """ Create a SeqFeature object with which to populate the `seqfeature` attribute. :return: """ # SeqFeature coordinates are 0-based half-open start, stop = basic.reformat_coordinates(self.start, self.stop, self.coordinate_format, "0_half_open") # SeqFeature orientation is (-1, 1) instead of ("R", "F") strand = basic.reformat_strand(self.orientation, "numeric") # Standard genes will have start < stop if self.start <= self.stop: self.seqfeature = SeqFeature(FeatureLocation(start, stop), strand=strand, type=self.type) # Wrap-around genes will have stop < start else: self.seqfeature = SeqFeature(CompoundLocation([ FeatureLocation(start, self.genome_length), FeatureLocation(0, stop) ]), strand=strand, type=self.type) # Add feature qualifiers self.seqfeature.qualifiers = self.get_qualifiers()
def set_orientation(self, value, fmt, capitalize=False): """ Set the orientation based on the indicated format. :param value: orientation value :type value: int or str :param fmt: how orientation should be formatted :type fmt: str :param capitalize: whether to capitalize the first letter of orientation :type capitalize: bool :return: """ self.orientation = basic.reformat_strand(value, fmt, capitalize)
def get_begin_end(self): """ Accesses feature coordinates in transcription begin-end format. :return: (begin, end) """ # Get a copy of the orientation in fr_long format: orientation = basic.reformat_strand(self.orientation, "fr_long") if orientation == "forward": # Rightward transcribed gene begin, end = self.start, self.stop elif orientation == "reverse": # Leftward transcribed gene begin, end = self.stop, self.start else: # Unexpected orientation begin = end = -1 return begin, end # tuple format is implicit
def check_feature_coordinates(self, use_cds=False, use_trna=False, use_tmrna=False, other=None, strand=False, eval_id=None, success="correct", fail="error", eval_def=None): """Identify nested, duplicated, or partially-duplicated features. :param use_cds: Indicates whether ids for CDS features should be generated. :type use_cds: bool :param use_trna: Indicates whether ids for tRNA features should be generated. :type use_trna: bool :param use_tmrna: Indicates whether ids for tmRNA features should be generated. :type use_tmrna: bool :param other: List of features that should be included. :type other: list :param strand: Indicates if feature orientation should be included. :type strand: bool :param eval_id: same as for check_attribute(). :param success: same as for check_attribute(). :param fail: same as for check_attribute(). :param eval_def: same as for check_attribute(). """ unsorted_feature_lists = [] unsorted_features = [] ftr_types = set() if use_cds: ftr_types.add("cds") unsorted_features.extend(self.cds_features) if use_trna: ftr_types.add("trna") unsorted_features.extend(self.trna_features) if use_tmrna: ftr_types.add("tmrna") unsorted_features.extend(self.tmrna_features) if other is not None: ftr_types.add("other") unsorted_features.extend(other) if strand: s_info = "were" unsorted_f_features = [] # Forward orientation unsorted_r_features = [] # Reverse orientation for index in range(len(unsorted_features)): feature = unsorted_features[index] strand = basic.reformat_strand(feature.orientation, format="fr_short") if strand == "f": unsorted_f_features.append(feature) else: unsorted_r_features.append(feature) unsorted_feature_lists.append(unsorted_f_features) unsorted_feature_lists.append(unsorted_r_features) else: s_info = "were not" unsorted_feature_lists.append(unsorted_features) ft_string = basic.join_strings(ftr_types, delimiter=", ") result = ( f"The following types of features were evaluated: {ft_string}. " f"Features {ft_string} separately grouped " "by orientation for evaluation. ") msgs = ["There are one or more errors with the feature coordinates."] for unsorted_features in unsorted_feature_lists: sorted_features = sorted(unsorted_features, key=attrgetter("start", "stop")) index = 0 while index < len(sorted_features) - 1: current = sorted_features[index] next = sorted_features[index + 1] ftrs = (f"Feature1 ID: {current.id}, " f"start coordinate: {current.start}, " f"stop coordinate: {current.stop}, " f"orientation: {current.orientation}. " f"Feature2 ID: {next.id}, " f"start coordinate: {next.start}, " f"stop coordinate: {next.stop}, " f"orientation: {next.orientation}. ") if (current.start == next.start and current.stop == next.stop): msgs.append(ftrs) msgs.append("Feature1 and Feature2 contain identical " "start and stop coordinates.") # To identify nested features, the following tests # avoid false errors due to genes that may wrap around the # genome. elif (current.start < next.start and current.start < next.stop and current.stop > next.start and current.stop > next.stop): msgs.append(ftrs) msgs.append("Feature2 is nested within Feature1.") elif (current.start == next.start and basic.reformat_strand( current.orientation, format="fr_short") == "r" and basic.reformat_strand(next.orientation, format="fr_short") == "r"): msgs.append(ftrs) msgs.append(("Feature1 and Feature2 contain " "identical stop coordinates.")) elif (current.stop == next.stop and basic.reformat_strand( current.orientation, format="fr_short") == "f" and basic.reformat_strand(next.orientation, format="fr_short") == "f"): msgs.append(ftrs) msgs.append(("Feature1 and Feature2 contain " "identical stop coordinates.")) else: pass index += 1 if len(msgs) > 1: result = result + " ".join(msgs) status = fail else: result = result + "The feature coordinates are correct." status = success definition = ("Check if there are any feature coordinate conflicts.") definition = basic.join_strings([definition, eval_def]) self.set_eval(eval_id, definition, result, status)