Esempi in Python per CDSHelper.CDSlength

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: data_helpers

Classe/tipologia: CDSHelper

Metodo/funzione: CDSlength

Esempi su hotexamples.com: 4

CDSHelper.CDSlength in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per data_helpers.CDSHelper.CDSlength, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

CDSHelper(14)

sequence(7)

shuffledSeqIds(7)

length(6)

seqId(4)

CDSlength(4)

getGeneId(3)

getCalculationResult2(2)

getShuffledSeq2(2)

getShuffledSeq(2)

dropShuffledSeqs(2)

flankingRegion3UtrLength(1)

getProtId(1)

commitChanges(1)

dropRecord(1)

getShuffledSeqId(1)

getTaxId(1)

dropNativeSeq(1)

nextCDSOnOppositeStrand(1)

saveCalculationResult2(1)

crc(1)

_fetchSequence(1)

checkCalculationResultWithWindows(1)

Esempio n. 1

Mostra file

File: requeue_sequences_missing_energies_for_sliding_window.py Progetto: michaelpeeri/rnafold-rts-public

        # Exclude some sequences from the calculation
        # ------------------------------------------------------------------------------------------

        # Skip sequences with partial CDS annotations
        #if(r.exists("CDS:taxid:%d:protid:%s:partial" % (taxIdForProcessing, protId))):
        #    skipped += 1
        #    continue

        #if( not r.exists(nativeCdsSeqIdKey % (taxIdForProcessing, protId)) ):
        #    skipped +=1
        #    continue

        cds = CDSHelper(taxIdForProcessing, protId)

        seqLength = cds.length()
        stopCodonPos = cds.CDSlength()

        if seqLength is None:
            print(
                "Warning: Could not find CDS length entry for taxid=%d, protid=%s"
                % (taxIdForProcessing, protId))
            skipped += 1
            stats['skipped-cds-length-missing'] += 1
            continue

        # Skip sequences with length <40nt (window width)
        if (seqLength < windowWidth + 1):
            print("short seq")
            stats['skipped-short-seq'] += 1
            skipped += 1
            continue

Esempio n. 2

Mostra file

File: calculate_sliding_window_series.py Progetto: michaelpeeri/rnafold-rts-public

    def calculateMissingWindowsForSequence(self, taxId, protId, seqIds, requestedShuffleIds, firstWindow, lastWindowStart, windowStep, reference="begin", shuffleType=db.Sources.ShuffleCDSv2_python, debug=False):

        timerForPreFolding.start()
        logging.warning("Parameters: %d %s %s %s %d %d %s %d" % (taxId, protId, seqIds, requestedShuffleIds, lastWindowStart, windowStep, reference, shuffleType))
        f = self._logfile

        assert(len(seqIds)>0)
        assert(len(seqIds)==len(requestedShuffleIds))

        # ------------------------------------------------------------------------
        # Obtain species-dependent properties needed for some calculations
        # ----------------
        # Optimal Temp
        optimalSpeciesGrowthTemperature = None
        if( self._seriesSourceNumber == db.Sources.RNAfoldEnergy_SlidingWindow40_v2_native_temp ):
            (numericalProp, _) = getSpeciesTemperatureInfo(taxId)
            optimalSpeciesGrowthTemperature = numericalProp[0]

            if optimalSpeciesGrowthTemperature is None:
                raise Exception("No temperature value for taxid={}, can't calculate native-temperature folding profile...".format(taxId))
            else:
                optimalSpeciesGrowthTemperature = float(optimalSpeciesGrowthTemperature)
                assert(optimalSpeciesGrowthTemperature >= -30.0 and optimalSpeciesGrowthTemperature <= 150.0)
        # ----------------
        # Genomic translation table
        genomicTranslationTable = None
        if( self._seriesSourceNumber in (db.Sources.StopCodon_content_SlidingWindow30, db.Sources.StopCodon_content_SlidingWindow40, db.Sources.StopCodon_content_SlidingWindow50 )):
            genomicTranslationTable = getSpeciesTranslationTable(taxId)
            assert(genomicTranslationTable>0 and genomicTranslationTable<=31)
            

        if( reference != "begin" and reference != "end" and reference != "stop3utr"):
            timerForPreFolding.stop()
            e = "Specificed profile reference '%s' is not supported!" % reference
            logging.error(e)
            raise Exception(e)

        # We will process all listed shuffle-ids for the following protein record
        if( reference == "begin" or reference == "end" ):
            regionOfInterest = RegionsOfInterset.CDSonly
        elif reference == "stop3utr":
            regionOfInterest = RegionsOfInterset.CDSand3UTR
        else:
            assert(False)
            
        cds = CDSHelper( taxId, protId, regionOfInterest=regionOfInterest )

        if( cds.length() < self._windowWidth ):
            e = "Refusing to process item %s because the sequence length (%d nt) is less than the window size (%d nt)\n" % (itemToProcess, cds.length(), self._windowWidth)
            f.write(e)
            logging.error(e)
            timerForPreFolding.stop()
            raise Exception(e)

        # Create a list of the windows we need to calculate for this CDS
        if reference == "begin":
            requestedWindowStarts = frozenset(list(range(0, min(lastWindowStart+1, cds.length()-self._windowWidth-1), windowStep)))
            if( len(requestedWindowStarts) == 0):
                e = "No windows exist for calculation taxid=%d, protId=%s, CDS-length=%d, lastWindowStart=%d, windowStep=%d, windowWidth=%d - Skipping...\n" % (taxId, protId, cds.length(), lastWindowStart, windowStep, self._windowWidth)
                f.write(e)
                logging.error(e)
                timerForPreFolding.stop()
                raise Exception(e)
            
        elif reference == "end":
            lastPossibleWindowStart = cds.length() - self._windowWidth #+ 1  # disregard lastWindowStart when reference=="end"
            #lastWindowCodonStart = (lastPossibleWindowStart-3)-(lastPossibleWindowStart-3)%3

            #lastPossibleWindowStart = seqLength - windowWidth # + 1  # disregard lastWindowStart when reference=="end"
            requestedWindowStarts = frozenset([x for x in range(lastPossibleWindowStart % windowStep, lastPossibleWindowStart+1, windowStep) if x>=lastWindowStart])

        elif reference == "stop3utr":
            seqLength = cds.length()
            stopCodonPos = cds.CDSlength()
            
            isRequired = [1 if abs(pos-stopCodonPos)<((lastWindowStart//2)*windowStep) else 0 for pos in range(0, seqLength - self._windowWidth, windowStep)]
            requestedWindowStarts = frozenset( compress( range(seqLength), isRequired ) )
            

            #requestedWindowStarts = frozenset(range(lastWindowCodonStart % windowStep, lastWindowCodonStart, windowStep))
            #pass
        else:
            assert(False)

        # First, read available results (for all shuffle-ids) in JSON format
        # Array is indexed by shuffle-id, so results not requested will be represented by None (as will requested items that have no results yet).
        logging.info("DEBUG: requestedShuffleIds (%d items): %s\n" % (len(requestedShuffleIds), requestedShuffleIds))
        existingResults = cds.getCalculationResult2( self._seriesSourceNumber, requestedShuffleIds, True, shuffleType=shuffleType )
        #assert(len(existingResults) >= len(requestedShuffleIds))  # The returned array must be at least as large as the requested ids list
        assert(len(existingResults) == len(requestedShuffleIds))
        logging.info("requestedShuffleIds: %s" % requestedShuffleIds)
        logging.info("existingResults.keys(): %s" % list(existingResults.keys()))
        assert(frozenset(requestedShuffleIds)==frozenset(list(existingResults.keys())))
        #existingResults = [None] * (max(requestedShuffleIds)+1)
        logging.info("DEBUG: existingResults (%d items): %s\n" % (len(existingResults), existingResults))

        # Check for which of the requested shuffle-ids there are values missing
        shuffleIdsToProcess = {}
        for shuffleId, r in list(existingResults.items()):
            if r is None:
                # There are no existing results for shuffled-id n. If it was requested, it should be calculated now (including all windows)
                if shuffleId in requestedShuffleIds:
                    shuffleIdsToProcess[shuffleId] = list(requestedWindowStarts)
                    
                timerForPreFolding.stop()
                
                # ------------------------------------------------------------------------------------
                continue   # TODO - verify this line; should we abort this sequence by throwing????
                # ------------------------------------------------------------------------------------

            logging.info("/// shuffleId r = %d %s" % (shuffleId, r))
            logging.info("r[MFE-profile] %s" % r["MFE-profile"])
            
            # Check the existing results for this shuffle
            alreadyProcessedWindowStarts = frozenset( [i for i,x in enumerate(r["MFE-profile"] ) if x is not None] ) # Get the indices (=window starts) of all non-None values
            missingWindows = requestedWindowStarts - alreadyProcessedWindowStarts # Are there any requested windows that are not already computed?
            if( missingWindows ): 
                shuffleIdsToProcess[shuffleId] = missingWindows

        if( not shuffleIdsToProcess):
            e = "All requested shuffle-ids in (taxId: %d, protId: %s, seqs: %s) seem to have already been processed. Skipping...\n" % (taxId, protId, str(list(zip(seqIds, requestedShuffleIds))) )
            logging.warning(e)
            timerForPreFolding.stop()
            return
        logging.info("DEBUG: shuffleIdsToProcess (%d items): %s\n" % (len(shuffleIdsToProcess), shuffleIdsToProcess))

        logging.info("DEBUG: Before (%d items): %s\n" % (len(existingResults), existingResults))
        # Initialize new results records
        for shuffleId in list(shuffleIdsToProcess.keys()):
            if existingResults[shuffleId] is None:
                logging.info(seqIds)
                logging.info(requestedShuffleIds)
                logging.info(shuffleId)
                thisSeqId = seqIds[ requestedShuffleIds.index(shuffleId) ]
                    
                existingResults[shuffleId] = { "id": "%s/%s/%d/%d" % (taxId, protId, thisSeqId, shuffleId), "seq-crc": None, "MFE-profile": [], "MeanMFE": None, "v": 2, "shuffle-type":shuffleType }
        logging.info("DEBUG: existingResults (%d items): %s\n" % (len(existingResults),existingResults) )
        timerForPreFolding.stop()

        # Load the sequences of all shuffle-ids we need to work on
        # TODO - combine loading of multiple sequences into one DB operation
        for shuffleId, record in list(existingResults.items()):
            if record is None:
                logging.info("DEBUG: skipping empty results record for shuffleId={}".format(shuffleId))
                continue
            timerForPreFolding.start()

            seq = None
            annotatedSeqId = None
            # Get the sequence for this entry
            if( shuffleId < 0 ):
                seq = cds.sequence()
                annotatedSeqId = cds.seqId()
            else:
                seq = cds.getShuffledSeq(shuffleId, shuffleType)
                annotatedSeqId = cds.getShuffledSeqId(shuffleId, shuffleType)

            if( seq is None or (not seq is None and len(seq)==0 )):
                seq2 = cds.getShuffledSeq2( annotatedSeqId )
                seq3 = cds._fetchSequence( annotatedSeqId )
                seq4 = cds._cache.get("%d:seq"%annotatedSeqId)
                if not seq4 is None:
                    del cds._cache["%d:seq"%annotatedSeqId]
                seq5 = cds.getShuffledSeq2( annotatedSeqId )
                e = "Got empty sequence for shuffleId=%d, seqId=%d, taxId=%d, protId=%s, numShuffled=%d, ids[%d:%d]=%s, len(seq2)=%d, len(seq3)=%d, len(seq4)=%d, len(seq5)=%d" % (shuffleId, annotatedSeqId, taxId, protId, len(cds.shuffledSeqIds()), shuffleId-2, shuffleId+2, cds.shuffledSeqIds()[shuffleId-2:shuffleId+2], len(seq2) if not seq2 is None else -1, len(seq3) if not seq3 is None else -1, len(seq4) if not seq4 is None else -1, len(seq5) if not seq5 is None else -1 )
                logging.error(e)
                timerForPreFolding.stop()
                raise Exception(e)

            #
            # Disabled - calculation needn't include the native sequence...
            #
            #if( annotatedSeqId not in seqIds ):
            #    e = "Error: SeqId specified in queue item %s does not match annotated seq-id %d\n" % (itemToProcess, annotatedSeqId)
            #    f.write(e)
            #    f.write("Current shuffle-id: %d\n" % shuffleId)
            #    f.write("Ids in existing results:\n")
            #    for shuffleId, record in enumerate(existingResults):
            #        f.write(" %d) %s\n" % (shuffleId, record['id']))
            #    f.write("Debug info:\n")
            #    f.write("\n".join(cds.getDebugInfo()))
            #    f.write("\n")
            #    f.write("Skipping...\n")
            #    print("Skipping...")
            #    raise Exception(e)

            expectedSeqLength = cds.length()
            if( not expectedSeqLength is None ):
                if( expectedSeqLength != len(seq) ):
                    e = "Warning: taxid=%d, protid=%s, seqid=%d - unexpected length %d (expected: %d)\n" % (taxId, protId, annotatedSeqId, len(seq), expectedSeqLength)
                    f.write(e)
                    logging.error(e)
                    timerForPreFolding.stop()
                    raise Exception(e)

            if( len(seq) < self._windowWidth ):
                # Sequence is shorter than required window; skip
                e = "Warning: skipping sequence because it is shorter than the requested window...\n"
                f.write(e)
                logging.error(e)
                timerForPreFolding.stop()
                raise Exception(e)

            logging.info("DEBUG: Processing item taxId=%d, protId=%s, shuffle=%d (length=%d, %d windows)...\n" % (taxId, protId, shuffleId, len(seq), len(requestedWindowStarts)))

            # TODO - Remove any old value stored in this key?

            # Skip this for now
            # This will be made redundant by completing the "updating" implementation
            #
            #if( cds.isCalculationDone( seriesSourceNumber, shuffleId )):
            #    # Sufficient data seems to exist. Skip...
            #    f.write("Item %s appears to be already completed, skipping..." % itemToProcess)
            #    continue

            logging.info(seq[:50])
            #f.write("\n")

            MFEprofile = record["MFE-profile"]
            #f.write("Profile: %s\n" % MFEprofile)

            # Make sure the profile array contains enough entries for all new windows (and possibly, if windows are non-contiguous, entries between them that we are not going to compute right now)
            if( len(MFEprofile) < max(requestedWindowStarts) ):
                entriesToAdd = max(requestedWindowStarts) - len(MFEprofile) + 1
                MFEprofile.extend( [None] * entriesToAdd )
            assert(len(MFEprofile) >= max(requestedWindowStarts))

            stats = RunningStats()
            stats.extend([x for x in MFEprofile if x is not None])

            timerForPreFolding.stop()
            timerForFolding.start()
            for start in requestedWindowStarts:
                fragment = seq[start:(start+self._windowWidth)]
                assert(len(fragment)==self._windowWidth)

                if self._seriesSourceNumber in (db.Sources.RNAfoldEnergy_SlidingWindow30_v2, db.Sources.RNAfoldEnergy_SlidingWindow40_v2, db.Sources.RNAfoldEnergy_SlidingWindow50_v2):
                    # Calculate the RNA folding energy. This is the computation-heavy part.
                    #strct, energy = RNA.fold(fragment)
                    result = RNAfold_direct(fragment)
                    assert(result <= 0.0)

                elif self._seriesSourceNumber == db.Sources.RNAfoldEnergy_SlidingWindow40_v2_native_temp:
                    # Calculate the RNA folding energy. This is the computation-heavy part.
                    #strct, energy = RNA.fold(fragment)
                    result = RNAfold_direct(fragment, explicitCalculationTemperature = optimalSpeciesGrowthTemperature)
                    assert(result <= 0.0)

                elif self._seriesSourceNumber == db.Sources.GC_content_SlidingWindow40:
                    result = calcWindowGCContent( fragment )
                    assert( isnan(result) or (result >= 0.0 and result <= 1.0) )
                    
                elif self._seriesSourceNumber == db.Sources.Purine_content_SlidingWindow40:
                    result = calcWindowPurineContent( fragment )
                    assert( isnan(result) or (result >= 0.0 and result <= 1.0) )
                    
                elif self._seriesSourceNumber in (db.Sources.StopCodon_content_SlidingWindow30, db.Sources.StopCodon_content_SlidingWindow40, db.Sources.StopCodon_content_SlidingWindow50):
                    result = calcWindowStopCodonContent( fragment, translationTable=genomicTranslationTable, phase=start%3 )
                    assert( result >= 0.0 and result <= 1.0 )

                    
                elif self._seriesSourceNumber == db.Sources.TEST_StepFunction_BeginReferenced:
                    if shuffleId < 0:
                        result = 0
                    else:
                        result = start%50 - 20
                
                elif self._seriesSourceNumber == db.Sources.TEST_StepFunction_EndReferenced:
                    if shuffleId < 0:
                        result = 0
                    else:
                        result = (expectedSeqLength - self._windowWidth - start)%50 - 20

                else:
                    logging.error("Received unknown seriesSourceNumber {}".format(self._seriesSourceNumber))
                    assert(False)
                    
                # Store the calculation result
                #print("%d:%s --> %f" % (taxId, protId, energy))

                stats.push(result)
                MFEprofile[start] = result

            #print("///////////////////  shuffleId={} (len={}) //////////////////////////".format(shuffleId, expectedSeqLength))
            if debug:
                prettyPrintProfile(MFEprofile)

            timerForFolding.stop()
            timerForPostFolding.start()

            # Format
            crc = getCrc(seq)
            #result = """{"id":"%s","seq-crc":%d,"MFE-profile":[%s],"MeanMFE":%.6g,v:2}""" % (itemToProcess, crc, ",".join(map(lambda x: "%.3g" % x, MFEprofile)), stats.mean())
            record["seq-crc"] = crc
            record["MFE-profile"] = [round4(x) for x in MFEprofile] # Round items down to save space (these are not exact numbers anyway)
            record["MeanMFE"] = stats.mean()
            
            if reference == "stop3utr":
                record["stop-codon-pos"] = cds.CDSlength()
                
            result = json.dumps(record)

            f.write(result)
            f.write("\n")

            if( not self._debugDoneWriteResults):
                cds.saveCalculationResult2( self._seriesSourceNumber, result, annotatedSeqId, False )
                
            timerForPostFolding.stop()

            
        timerForPostFolding.start()
        
        if( not self._debugDoneWriteResults):
            cds.commitChanges()
            
        timerForPostFolding.stop()

Esempio n. 3

Mostra file

File: compare_updates.py Progetto: michaelpeeri/rnafold-rts-public

                newValues = sum([1 for x in profile1 if not x is None])
                windowsAddedToProfiles.update( (newValues,) )


                rawRecordId = updateRecord[1]['id']
                try:
                    recordId = splitLongSequenceIdentifier(rawRecordId)
                            
                except Exception as e:
                    err[ErrorTypes.UpdateRecordFormatError] += 1
                    badUpdateRecords.add(updateRecord[2])
                    print(e)
                    continue

                cds = CDSHelper(recordId[0], recordId[1] )
                cdsLength = cds.CDSlength()

                newPositions = [x[0] for x in enumerate(profile1) if not x[1] is None]

                for pos in newPositions:
                    windowsAddedToProfiles_DistanceFromStart.update( (pos,) )
                    windowsAddedToProfiles_DistanceFromEnd.update( (cdsLength-pos,) )
                    windowsAddedToProfiles_FrameRelativeToStart.update( (pos%10,) )
                    windowsAddedToProfiles_FrameRelativeToEnd.update( ((cdsLength-pos)%10,) )
                

        if(rl()):
            print(total, err, recordsByTaxId)

        # DEBUG ONLY #### DEBUG ONLY #### DEBUG ONLY #### DEBUG ONLY #### DEBUG ONLY #### DEBUG ONLY #### DEBUG ONLY #
        #if( total > 90000):

Esempio n. 4

Mostra file

    def randomize(self, nucleotideSeq: str, protId: str) -> (int, float, str):

        #print("-----------"*5)
        cds = CDSHelper(self.taxId, protId)

        # Get metadata from genome model
        #gm = cds.getGenomeModel()

        #found = gm.findFeatureById( protId )
        #if found is None:
        #    raise Exception("Failed to find feature matching protein-id={} in genome model".format(protId))
        #(moleculeId, currFeature)  = found

        #if gm.moleculeModels[moleculeId].find3PrimeFlankingRegion( currFeature, debug=True ) is None:
        #    pass
        #print((moleculeId, feature))

        cdsLengthNt = cds.CDSlength()
        assert (cdsLengthNt % 3 == 0)
        flankingRegionLengthNt = cds.flankingRegion3UtrLength()
        nextCDSOppositeStrand = cds.nextCDSOnOppositeStrand()

        # Case 1 (no overlap):
        #                     +--------intergenic--------+
        #                     |                          |
        # +-------CDS1--------+                          +------------CDS2-----------+
        # |                   |                          |                           |
        # +===================+--------------------------+===========================+
        # |                   |                          |                           |
        # +===================+--------------------------+===========================+
        # |<---cdsLengthNt--->|<-flankingRegionLengthNt->|                           |
        # |                              (>= 0)                                      |
        # |<---------------------------cds.totalLength()---------------------------->|

        # Case 2 (overlap):
        #                     +--------------------------CDS2-------------------------+
        #                     |                                                       |
        # +---------------------CDS1----------------------+                           |
        # |                   |                           |                           |
        # +===================+===========================+===========================+
        # |                   |                           |                           |
        # +===================+===========================+===========================+
        # |                   |<-flankingRegionLengthNt-->|                           |
        # |                              (<= 0)           |                           |
        # |<----------------cdsLengthNt------------------>|                           |
        # |<----------------------------cds.totalLength()---------------------------->|

        if flankingRegionLengthNt < 0 and -flankingRegionLengthNt > cdsLengthNt:
            #flankingRegionLengthNt = -cdsLengthNt
            raise Exception("Next CDS is fully overlapping...")

        #-----------------------------------------------------------------------------
        # Randomize the "main" CDS
        #-----------------------------------------------------------------------------
        # First, determine which region to randomize...
        if (not self.constantOverlaps) or (
                flankingRegionLengthNt >=
                0):  # no overlap, or overlap should be randomized
            CDSseq = nucleotideSeq[:cdsLengthNt]
            assert (len(CDSseq) == cdsLengthNt)
        else:  # constant overlaps requested and this CDS is overlapping the next. Remove the overlap from the CDS (it will not be randomized):
            lastNucBeforeOverlap = cdsLengthNt + flankingRegionLengthNt
            assert (lastNucBeforeOverlap < cdsLengthNt)
            lastNucToRandomize = lastNucBeforeOverlap - (lastNucBeforeOverlap %
                                                         3)
            CDSseq = nucleotideSeq[:lastNucToRandomize]
            assert (len(CDSseq) % 3 == 0)

        # Then, do the randomization...
        (CDSpermCount, CDSidentity,
         randomizedCDS) = self.cdsRand.randomizeAmbiguousSequence(CDSseq)

        # Finally, add the non-randomized part of the CDS (if any)
        if (not self.constantOverlaps) or (
                flankingRegionLengthNt >=
                0):  # no overlap, or overlap should be randomized
            pass
        else:  # constant overlaps requested and this CDS is overlapping the next.
            randomizedCDS = randomizedCDS + nucleotideSeq[
                lastNucToRandomize:cdsLengthNt]
            assert (len(randomizedCDS) % 3 == 0)

        assert (
            len(randomizedCDS) == cdsLengthNt
        )  # the length of the resulting sequence matches the original CDS sequence

        #-----------------------------------------------------------------------------
        # Randomize the 3'UTR
        #-----------------------------------------------------------------------------
        if flankingRegionLengthNt > 0:
            _3UTRseq = nucleotideSeq[cdsLengthNt:cdsLengthNt +
                                     flankingRegionLengthNt]
            assert (len(_3UTRseq) == flankingRegionLengthNt)
            (UTRpermCount, UTRidentity,
             randomizedUTR) = self.utrRand.randomizeAmbiguousSequence(_3UTRseq)
        else:
            _3UTRseq = ""
            UTRpermCount = 1
            UTRidentity = 1.0
            randomizedUTR = ""

        #-----------------------------------------------------------------------------
        # Randomize the downstream CDS
        #-----------------------------------------------------------------------------
        nextCDSseq = nucleotideSeq[
            cdsLengthNt +
            flankingRegionLengthNt:]  # Should work for positive and negative length UTRs
        assert (len(nextCDSseq) % 3 == 0)
        #nextCDSseq = nextCDSseq[(len(nextCDSseq)%3):]  # remove partial codons from the start (caused due to the overlap; we can only randomize each codon as part of one CDS, although in the overlap region codons belong to two CDSs...)
        if nextCDSOppositeStrand:
            nextCDSseq = str(Seq(nextCDSseq, generic_dna).reverse_complement())
        assert (len(nextCDSseq) % 3 == 0)
        (nextCDSpermCount, nextCDSidentity, randomizedNextCDS
         ) = self.cdsRand.randomizeAmbiguousSequence(nextCDSseq)
        if nextCDSOppositeStrand:  # if the next CDS is on the opposite strand, revcomp it back to its original frame
            randomizedNextCDS = str(
                Seq(randomizedNextCDS, generic_dna).reverse_complement())
        if flankingRegionLengthNt < 0:
            randomizedNextCDS = randomizedNextCDS[-flankingRegionLengthNt:]

        totalPerms = CDSpermCount * UTRpermCount * nextCDSpermCount

        totalIdentity = ((CDSidentity * len(CDSseq)) +
                         (UTRidentity * len(_3UTRseq)) +
                         (nextCDSidentity * len(nextCDSseq))) / (
                             len(CDSseq) + len(_3UTRseq) + len(nextCDSseq))

        return (totalPerms, totalIdentity,
                randomizedCDS + randomizedUTR + randomizedNextCDS)