def sequence_variant(self, sequence, mutation, strand="+"): """ Return the possible mutation(s) in the sequence due to the sequence. For now, return the original sequence. # TODO: probably not return the original sequence """ # first step: get positive string, standardizing so it's # easier for me to think about if sequence.strand == "-": seq = sequence.reverse_complement() else: seq = sequence if mutation["observed"] == "lengthTooLong": # raise UnknownMutationError, "Could not read the variants for %s SNP %s at chr%s:%s-%s" % (mutation['class'], mutation['name'], mutation['chrom'], mutation['chromStart'], mutation['chromEnd']) sequences = [] dup = self.__class__.duplicate(sequence) dup2 = self.__class__.duplicate(sequence, name=mutation["name"]) xform = TransformedGenomeSequence.replace( dup2, mutation["chromStart"], mutation["chromEnd"], "?" * (mutation["chromEnd"] - mutation["chromStart"] + 1), ) sequences.append(dup) sequences.append(xform) else: klass = mutation["class"].lower().replace("-", "") sequences = getattr(self.__class__, "_%s_sequence_variant" % klass, self.__class__._unknown_variant)( seq, mutation ) if strand == "-": return [reverse_complement(s) for s in sequences] else: return sequences
:param transformer: The MutationTransformer that takes in the original sequence and applies the SNP mutations. Mutation structures will vary with each transformer. """ xf = transformer e = None try: sequences = xf.sequence_variants(original, mutations, combination_width, strand) except ReturnWithCaveats, e: rc = e sequences = e.return_value sequences = [seq for seq in sequences if seq.positive_strand_sequence != original.positive_strand_sequence] if original.strand == strand: sequences.insert(0, TransformedGenomeSequence.from_sequence(original)) else: sequences.insert(0, TransformedGenomeSequence.from_sequence(original.reverse_complement())) if e: raise ReturnWithCaveats(e.explanations, sequences) else: return sequences def find_in_sequence(fragment, sequence): regex = base_regexp_expand(fragment, overlap=True) return __find_in_sequence_regex(regex, sequence, overlapped=True) def __find_in_sequence_regex(regex, sequence, overlapped=True): seq_matches = []
def duplicate(cls, sequence, name=None): return TransformedGenomeSequence.from_sequence(sequence, name=name)