예제 #1
0
def find_restriction_sites_in_seq(seq, enzyme):
    occurrences = []

    # Search strategy similar to Bio.Restriction.RestrictionBatch.search(),
    # which is to wrap the sequence we're searching across FormattedSeq object
    # and then use the finditer() method.
    forward_group_name = str(enzyme)
    if not isinstance(seq, Seq):
        seq = Seq(seq)
    fseq = FormattedSeq(seq)
    occurrence_iterator = fseq.finditer(enzyme.compsite, enzyme.size)

    occurrence_iterator = fseq.finditer(enzyme.compsite, enzyme.size)
    for start, match_group in occurrence_iterator:
        interval = (start, start + enzyme.size)
        if match_group(forward_group_name):
            strand = 1
        else:
            strand = -1

        # Adjust the interval to be pythonic.
        interval = tuple([pos - 1 for pos in interval])

        # Append the data object representing this occurrence.
        site_occur_data = {
                'enzyme': forward_group_name,
                'site': enzyme.site,
                'interval': interval,
                'strand': strand
        }
        occurrences.append(site_occur_data)

    return occurrences
예제 #2
0
 def test_sequence_object(self):
     """Test if sequence must be a Seq or MutableSeq object."""
     with self.assertRaises(TypeError):
         seq = FormattedSeq('GATC')
     seq = FormattedSeq(Seq('TAGC'))
     seq = FormattedSeq(MutableSeq('AGTC'))
     seq = FormattedSeq(seq)
     with self.assertRaises(TypeError):
         EcoRI.search('GATC')
     EcoRI.search(Seq('ATGC'))
     EcoRI.search(MutableSeq('TCAG'))
예제 #3
0
    def test_circular_sequences(self):
        """Deal with cutting circular sequences."""
        parts = EcoRI.catalyse(self.ecosite_seq, linear=False)
        self.assertEqual(len(parts), 1)
        locations = EcoRI.search(parts[0], linear=False)
        self.assertEqual(locations, [1])

        parts = KpnI.catalyse(self.kpnsite_seq, linear=False)
        self.assertEqual(len(parts), 1)
        locations = KpnI.search(parts[0], linear=False)
        self.assertEqual(locations, [1])

        parts = SmaI.catalyse(self.smasite_seq, linear=False)
        self.assertEqual(len(parts), 1)
        locations = SmaI.search(parts[0], linear=False)
        self.assertEqual(locations, [1])

        self.assertEqual(
            EarI.search(FormattedSeq(Seq('CTCTTCAAAAA')), linear=False), [8])
        self.assertEqual(
            SnaI.search(FormattedSeq(Seq('GTATACAAAAA')), linear=False), [1])
예제 #4
0
 def test_formatted_seq(self):
     """Test several methods of FormattedSeq."""
     self.assertEqual(str(FormattedSeq(Seq('GATC'))),
                      "FormattedSeq(Seq('GATC', Alphabet()), linear=True)")
     self.assertFalse(FormattedSeq(Seq('GATC')) ==
                      FormattedSeq(Seq('TAGC')))
     self.assertFalse(FormattedSeq(Seq('TAGC')) == Seq('TAGC'))
     self.assertTrue(FormattedSeq(Seq('ATGC')) ==
                     FormattedSeq(Seq('ATGC')))
     linear_seq = FormattedSeq(Seq('T'))
     self.assertTrue(linear_seq.is_linear())
     linear_seq.circularise()
     self.assertFalse(linear_seq.is_linear())
     linear_seq.linearise()
     circular_seq = linear_seq.to_circular()
     self.assertFalse(circular_seq.is_linear())
     linear_seq = circular_seq.to_linear()
     self.assertTrue(linear_seq.is_linear())
예제 #5
0
 def test_formatted_seq(self):
     """Test several methods of FormattedSeq."""
     self.assertEqual(str(FormattedSeq(Seq('GATC'))),
                      "FormattedSeq(Seq('GATC'), linear=True)")
     self.assertFalse(
         FormattedSeq(Seq('GATC')) == FormattedSeq(Seq('TAGC')))
     self.assertFalse(FormattedSeq(Seq('TAGC')) == Seq('TAGC'))
     self.assertTrue(FormattedSeq(Seq('ATGC')) == FormattedSeq(Seq('ATGC')))
     linear_seq = FormattedSeq(Seq('T'))
     self.assertTrue(linear_seq.is_linear())
     linear_seq.circularise()
     self.assertFalse(linear_seq.is_linear())
     linear_seq.linearise()
     circular_seq = linear_seq.to_circular()
     self.assertFalse(circular_seq.is_linear())
     linear_seq = circular_seq.to_linear()
     self.assertTrue(linear_seq.is_linear())
예제 #6
0
 def test_non_iupac_letters(self):
     """Test if non-IUPAC letters raise a TypeError."""
     with self.assertRaises(TypeError):
         seq = FormattedSeq(Seq('GATCZ'))
예제 #7
0
def findPossOH_byPrimerLength(GGfrags, maxPrimerLength, annealingLength,
                              gBlockMaxSize, enzyme):
    segments = []
    forced_methods = []
    for each in GGfrags:
        seg = [
            each.fiveprimeOH + each.fiveprimeExt, each.seq,
            each.threeprimeExt + each.threeprimeOH
        ]
        segments.append(seg)
        forced_methods.append(each.forced_method)
    wiggleRoom = []
    for i in range(len(segments)):
        leftWiggle = 0
        rightWiggle = 0
        #If junction is with vector, skip it because it is fixed
        if i == 0:
            pass
        else:
            leftSeg = segments[i - 1]
            rightSeg = segments[i]
            #constraints on leftWiggle from primer length
            leftWiggle_primer = maxPrimerLength - len(
                rightSeg[0]) - annealingLength
            #constraints on leftWiggle from piece length
            leftWiggle_pieceLen = len(leftSeg[1] + leftSeg[2]) - wiggleRoom[
                i - 1][1] - annealingLength
            if forced_methods[i] == "gBlocks":
                leftWiggle_gBlock = gBlockMaxSize - len(rightSeg[0] +
                                                        rightSeg[1] +
                                                        rightSeg[2]) - 22
                leftWiggle_pieceLen = min(leftWiggle_pieceLen,
                                          leftWiggle_gBlock)
            elif forced_methods[i] == "Oligo Assembly":
                leftWiggle_oligo = 200 - len(rightSeg[0] + rightSeg[1] +
                                             rightSeg[2])
                leftWiggle_pieceLen = min(leftWiggle_pieceLen,
                                          leftWiggle_oligo)
            #assign the minimum constraints to leftWiggle
            leftWiggle = min(leftWiggle_primer, leftWiggle_pieceLen)
            #constraints on rightWiggle from primer length
            rightWiggle_primer = maxPrimerLength - len(
                leftSeg[2]) - annealingLength - 4
            #constraints on rightWiggle from piece length
            #Don't have to substract 4 here, but it will make primer design easier
            rightWiggle_pieceLen = len(rightSeg[0] +
                                       rightSeg[1]) - annealingLength - 4
            #assign the minimum constraints to rightWiggle
            if forced_methods[i - 1] == "gBlocks":
                rightWiggle_gBlock = gBlockMaxSize - len(
                    leftSeg[0] + leftSeg[1] +
                    leftSeg[2]) - 22 - 4 + wiggleRoom[i - 1][0]
                rightWiggle_pieceLen = min(rightWiggle_pieceLen,
                                           rightWiggle_gBlock)
            elif forced_methods[i - 1] == "Oligo Assembly":
                rightWiggle_oligo = 200 - len(leftSeg[0] + leftSeg[1] +
                                              leftSeg[2]) - 4 + wiggleRoom[
                                                  i - 1][0]
                rightWiggle_pieceLen = min(rightWiggle_pieceLen,
                                           rightWiggle_oligo)
            rightWiggle = min(rightWiggle_primer, rightWiggle_pieceLen)
        wiggleRoom.append((-leftWiggle, rightWiggle))

    poss_ohs = []
    for i in range(1, len(segments)):
        leftSeg = segments[i - 1]
        rightSeg = segments[i]
        leftPiece = leftSeg[0] + leftSeg[1] + leftSeg[2]
        rightPiece = rightSeg[0] + rightSeg[1] + rightSeg[2]
        combined = leftPiece.upper() + rightPiece.upper()
        oh_possibilities = []
        poss_string = combined[len(leftPiece) +
                               wiggleRoom[i][0]:len(leftPiece) +
                               wiggleRoom[i][1] + 1]
        enz = getattr(Restriction, enzyme)
        if len(enz.search(FormattedSeq(Seq(poss_string)))) > 0:
            oh_index = None
            oh_seq = None
            if poss_string.find(enz.site) > -1:
                site_location = poss_string.find(enz.site)
                oh_index = site_location + wiggleRoom[i][0] + 7
                oh_seq = poss_string[site_location + 7:site_location + 11]
            else:
                site_location = poss_string.find(
                    str(Seq(enz.site).reverse_complement()))
                oh_index = site_location + wiggleRoom[i][0] - 5
                oh_seq = poss_string[site_location - 5:site_location - 1]
            poss_ohs.append([(oh_seq, oh_index)])
        else:
            for j in range(
                    len(leftPiece) + wiggleRoom[i][0],
                    len(leftPiece) + wiggleRoom[i][1] + 1):
                oh_possibilities.append(
                    (combined[j:j + 4], j - len(leftPiece)))
            #If no overhang options were found for a junction, return false
            if len(oh_possibilities) == 0:
                return False
            else:
                oh_sorted = sorted(oh_possibilities,
                                   key=lambda overhang: abs(overhang[1]))
                poss_ohs.append(oh_sorted)
    return poss_ohs
예제 #8
0
    def checkInput_afterOptimization(self):
        f_seq = FormattedSeq(Seq(self.partSeq), True)

        #Check to make sure BbsI/BsmBI sites aren't present
        if BbsI.search(f_seq) and BsmBI.search(f_seq):
            #comment the line below to let through parts with BbsI and BsmBI
            #be careful, though, these assemblies may be problematic
            #self.errors.append("Your part contains both BbsI and BsmBI sites and cannot be assembled using golden gate.")
            pass
        elif self.leftPartType in [
                '1', '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '5',
                '6', '7'
        ] and BbsI.search(f_seq):
            self.errors.append(
                "Your part contains a BbsI site which must be removed prior to assembly."
            )

        elif self.rightPartType in [
                '1', '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '5',
                '6', '7'
        ] and BbsI.search(f_seq):
            self.errors.append(
                "Your part contains a BbsI site which must be removed prior to assembly."
            )

        elif self.leftPartType in [
                '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '6', '7'
        ] and BsmBI.search(f_seq):
            self.errors.append(
                "Your part contains a BsmBI site which must be removed prior to assembly."
            )

        elif self.rightPartType in [
                '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '6', '7'
        ] and BsmBI.search(f_seq):
            self.errors.append(
                "Your part contains a BsmBI site which must be removed prior to assembly."
            )

        #Check to make sure connector parts have BsmBI sites
        bsmBIFor = self.partSeq.upper().count(BsmBI.site)
        bsmBIRev = self.partSeq.upper().count(
            str(Seq(BsmBI.site).reverse_complement()))
        if str(self.leftPartType) == "1" or str(self.rightPartType) == "1":
            if bsmBIFor + bsmBIRev != 1:
                self.warnings.append(
                    "Your type 1 part should have exactly 1 BsmBI site. Consider modifying for multigene assembly."
                )
        if str(self.leftPartType) == "5" or str(self.rightPartType) == "5":
            if bsmBIFor + bsmBIRev != 1:
                self.warnings.append(
                    "Your type 5 part should have exactly 1 BsmBI site. Consider modifying for multigene assembly."
                )

        #Warn if ORF has a start codon
        if self.leftPartType in ["3a"]:
            if str(self.partSeq.upper())[:3] != "ATG":
                self.warnings.append("Your part is missing a start codon.")
        #Warn if ORF has a stop codon
        if str(self.rightPartType) in ['3a', '3b', '3c', '3d', '3e', '4a']:
            if len(self.partSeq) % 3 != 0:
                self.warnings.append(
                    "Your part appears to be out of frame (length is not a multiple of 3). If this is a coding sequence, check to make sure it is correct."
                )
            if Seq(self.partSeq).translate().find("*") > -1:
                self.warnings.append(
                    "Your part has a stop codon. If it is not removed, the part cannot be used for making N-terminal fusions."
                )

        if len(self.errors) != 0:
            return False
        else:
            return True
예제 #9
0
def silentMutate(seq, leftIndex, rightIndex, enzyme_list=[]):
    firstCodonIndex = leftIndex - (leftIndex % 3)
    numCodons = int(rightIndex / 3) - int(leftIndex / 3) + rightIndex % 3
    possCodons = []
    # i holds the codon number
    for i in range(numCodons):
        currCodon = seq[firstCodonIndex + i * 3:firstCodonIndex + i * 3 + 3]
        if len(currCodon) == 3:
            allPossCodons = HsCodonUsage[GeneticCode[currCodon.upper()]]
            for codon in allPossCodons:
                # p holds the position within the codon of the mutated base (should always be 2 as written currently)
                p = singleBPmutation(codon[0], currCodon)
                if p and leftIndex <= p + i * 3 + firstCodonIndex < rightIndex:
                    possCodons.append([codon[0], codon[1], i, p])
    possCodons.sort(key=lambda x: x[1])
    if len(possCodons) < 1:
        raise Exception("Couldn't find a base to mutate silently.")
    successfullyMutated = False
    while not successfullyMutated:
        newCodon = possCodons.pop()

        oldBase = seq[firstCodonIndex + newCodon[2] * 3 +
                      newCodon[3]:firstCodonIndex + newCodon[2] * 3 +
                      newCodon[3] + 1]
        newBase = ""
        if oldBase.islower():
            newBase = newCodon[0][newCodon[3]].upper()
        else:
            newBase = newCodon[0][newCodon[3]].lower()

        mutationIndex = firstCodonIndex + newCodon[2] * 3 + newCodon[3]
        leftBase = seq[mutationIndex - 1:mutationIndex]
        rightBase = seq[mutationIndex + 1:mutationIndex + 2]
        front = seq[:mutationIndex]
        if (leftBase.islower()
                and newBase.islower()) or (leftBase.isupper()
                                           and newBase.isupper()):
            front = front.swapcase()
        back = seq[mutationIndex + 1:]
        if (rightBase.islower()
                and newBase.islower()) or (rightBase.isupper()
                                           and newBase.isupper()):
            back = back.swapcase()
        newSeq = front + newBase + back

        introducedNewSite = False
        for enzyme_name in enzyme_list:
            enzyme = getattr(Restriction, enzyme_name)
            orig = FormattedSeq(Seq(seq))
            new = FormattedSeq(Seq(newSeq))
            if len(enzyme.search(new)) > len(enzyme.search(orig)):
                introducedNewSite = True
        if not introducedNewSite or len(possCodons) < 1:
            successfullyMutated = True

    s1 = Seq(seq)
    s2 = Seq(newSeq)
    if str(s1.translate()) != str(s2.translate()):
        raise Exception(
            "Error: The attempted silent mutation wasn't silent!!!")
    return newSeq