Example #1
0
def nested(seq):
    shrt = 18
    lng = 25
    pr = ''
    maxa = 0
    ok = 0
    while ok != 1:
        if shrt == lng + 1:
            shrt = 18
            seq = seq[1:]
        pr = seq[:shrt]
        if GC(pr) >= 60 or GC(pr) <= 40:
            shrt += 1
            continue
        else:
            if GCend(pr) == 0:
                shrt += 1
            else:
                if mt.Tm_Wallace(pr) > 60 or mt.Tm_Wallace(pr) < 56:
                    shrt += 1
                else:
                    pr_revers = pr[::-1]
                    complalig = SWalig(SWvlmtrx(pr, pr_revers), pr, pr_revers)
                    if complalig[2].find("****") >= 0 or self_dimers(
                            pr
                    ) == 1 or complalig[2].find(
                            "***-**") >= 0 or complalig[2].find("**-***") >= 0:
                        shrt += 1
                    else:
                        hpin = hairpin(pr)
                        if hpin == 0:
                            shrt += 1
                        else:
                            return pr
def select_primers(circRNA_seq):
    for i in range(7, 14):
        print(i)
        five_end = circRNA_seq[0:i]
        three_end = circRNA_seq[(i - 20):]
        forward_primer = Seq(circRNA_seq[(i - 150):(i - 150) + 20])

        Rev_Comp = Seq(three_end + five_end)
        Rev_Primer = Rev_Comp.reverse_complement()

        print(forward_primer, mt.Tm_Wallace(forward_primer))
        print(Rev_Primer, mt.Tm_Wallace(Rev_Primer))
        print(Rev_Comp)
        difference = mt.Tm_Wallace(forward_primer) - mt.Tm_Wallace(Rev_Primer)
        print(abs(difference))
Example #3
0
def repeat_finder(seq):
    '''Finds largest repeat in a sequence and returns the position of the repeat.'''
    string = seq.upper()
    l = list(string)
    d = collections.deque(string[1:])
    match = []
    longest_match = []
    while d:
        for i, item in enumerate(d):
            if l[i] == item:
                match.append(item)
            else:
                if len(longest_match) < len(match):
                    longest_match = match
                match = []
        d.popleft()
    repeat_sequence = ''.join(longest_match)
    if len(repeat_sequence) > 20:
        location = string.find(repeat_sequence)
        length = len(repeat_sequence)
        logger.debug("Found long {}bp repeat at location {}".format(
            length, location))
        return location, length
    if len(repeat_sequence) < 20 and int(mt.Tm_Wallace(
            Seq(repeat_sequence))) > 55:
        location = string.find(repeat_sequence)
        length = len(repeat_sequence)
        logger.debug("Found long {}bp repeat at location {}".format(
            length, location))
        return location, length
    else:
        return False
Example #4
0
def get_quick_interaction_score(domains, domains_c,row,T=25,magnesium=0, sodium=1.0, tm_target = 60):
    no_domains = len(domains)
    no_domains_c = len(domains_c)
    e_vec_domains = np.zeros((no_domains))
    for i in range(0,no_domains):
        e_vec_domains[i] = get_duplex_energy([domains[row], domains_c[i]], T = T, magnesium = magnesium, sodium=sodium)
    target_score = e_vec_domains[row]
    cross_reactions = np.sum(np.delete(e_vec_domains, row))
    tm_wallace = mt.Tm_Wallace(domains[row])
    tm_penalty = 1+np.abs(tm_target-tm_wallace)
    quick_score = target_score-cross_reactions*longest(domains[row])*get_most_represented_base_freq(domains[row])*tm_penalty
    return quick_score
Example #5
0
 def calc_tm_value(self):
     self.primer_fw_seq = Seq(self.primer_fw.upper())
     self.primer_rv_seq = Seq(self.primer_rv.upper())
     self.tm_value_Wallace = np.mean([
         mt.Tm_Wallace(self.primer_fw_seq),
         mt.Tm_Wallace(self.primer_rv_seq)
     ])  # GC法で計算
     self.tm_value_GC = np.mean([
         mt.Tm_GC(self.primer_fw_seq, Na=50, valueset=7),
         mt.Tm_GC(self.primer_rv_seq, Na=50, valueset=7)
     ])  # GC法で計算
     self.tm_value_NN = np.mean([
         mt.Tm_NN(self.primer_fw_seq, Na=50, nn_table=mt.DNA_NN1),
         mt.Tm_NN(self.primer_rv_seq, Na=50, nn_table=mt.DNA_NN1)
     ])  # 最近接塩基法で計算
     self.tm_table_column = ["計算手法", "Tm値 (°C)"]
     self.tm_list = [["Wallace法",
                      round(self.tm_value_Wallace, 1)],
                     ["GC法", round(self.tm_value_GC, 1)],
                     ["最近接塩基法", round(self.tm_value_NN, 1)]]
     self.tm_table = pd.DataFrame(self.tm_list,
                                  columns=self.tm_table_column)
Example #6
0
def gen_nested_primers(end3, end5):
    list_prF = []
    list_prR = []
    re_end3 = end3
    re_end5 = end5
    n = 10
    maxa = 0
    while maxa != n:
        try:
            prF = nested(re_end3)
            prR = nested(re_end5)
            list_prF.append(prF)
            list_prR.append(prR)
            re_end3 = re_end3[re_end3.find(prF) + len(prF):]
            re_end5 = re_end5[re_end5.find(prR) + len(prR):]
            maxa += 1
        except:
            n -= 1
    for i in list_prR:
        for j in list_prF:
            if abs(mt.Tm_Wallace(i) - mt.Tm_Wallace(j)) > 3:
                continue
            else:
                if compare(i, j) == 0:

                    continue
                else:
                    prF = j
                    prR = i
                    if len(end3[end3.find(prF) + len(prF):]) * 4 < len(
                            end5[end5.find(prR) + len(prR):]):
                        continue
                    elif len(end3[end3.find(prF) + len(prF):]) > 4 * len(
                            end5[end5.find(prR) + len(prR):]):
                        continue
                    else:
                        end3 = end3[end3.find(prF) + len(prF) - 3:]
                        end5 = end5[end5.find(prR) + len(prR) - 3:]
                        return (prF, prR, end3, end5)
Example #7
0
 def _create_staple_max_melt_T(self) -> Dict[Strand, float]:
     """ max_melt_T is the staple domain with the highest metling temperature"""
     staple_domains_melt_t: Dict[Strand, List[float]] = dict()
     for staple in self.staples:
         domains = staple.domain_list
         for domain in domains:
             if "N" not in domain.sequence:
                 # NOTE: using nearest neighbor for domain with length higher
                 #   than 14 using 'Wallace rule' else
                 if len(domain.base_list) > 14:
                     staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_NN(
                         Seq(domain.sequence), Na=0, Mg=17.5))
                 else:
                     staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_Wallace(
                         Seq(domain.sequence)))
     max_staple_melt_t = {key: max(value) for (
         key, value) in staple_domains_melt_t.items()}
     return max_staple_melt_t
def melt_temp_dict():
    #Create melting temperature dictionary for all trimers
    
    #Create trimer list
    all_trimers=list(product(["A","C","G","T","N"], repeat=3))
    all_trimers=["".join(x) for x in all_trimers]
    ACGT_trimers=filter(lambda x: "N" not in x, all_trimers)
    N_trimers=filter(lambda x: "N" in x, all_trimers)
    
    #Obtain melting temperature
    trimer_dict=dict((x,mt.Tm_Wallace(Seq(x))) for x in ACGT_trimers)
    
    #Add "NA" for "NNN" temperatures
    n_trimer_dict=dict((x,"NA") for x in N_trimers)
    trimer_dict.update(n_trimer_dict)
    
    #Return
    return trimer_dict
Example #9
0
def Oligo(target_dna):
    '''
    return should be dict type, GC_contents, Tm_value, Reverse compliment
    '''
    result = {
        'GC_contents': 0,
        'Tm_value': 0,
        'Complement_seq': 0,
        'Reverse_complement_seq': 0,
        'Length_of_oligo': 0
    }
    dna = Seq(target_dna)  # set biopython seq type
    result['GC_contents'] = '{:.2f} %'.format(GC(dna))
    result['Tm_value'] = MeltingTemp.Tm_Wallace(dna)
    result['Complement_seq'] = str(dna.complement())
    result['Reverse_complement_seq'] = str(dna.reverse_complement())
    result['Length_of_oligo'] = str(len(dna))

    return result
Example #10
0
def melting_temp(input_str):
    seq = Seq(input_str, IUPAC.unambiguous_dna)
    return mt.Tm_Wallace(seq)
Example #11
0
def main(sequences):

    # container for good oligos
    my_oligos = {}
    my_sites = {}
    my_names = {}

    labels = sequences.keys()

    # annealing overlaps
    #overlap5 = 'cttgtggaaaggacgaaacacc'.upper() # TM: 58-59 C
    #overlap3 = 'gttttagagctagaaatagcaagttaaaataaggc'.upper() # TM: 58-59 C

    # pcr overlaps
    overlap5 = 'TTTCTTGGCTTTATATATCTTGTGGAAAGGACGAAACACC'.upper(
    )  # TM: 58-59 C
    overlap3 = 'GTTTTAGAGCTAGAAATAGCAAGTTAAAATAAGGCTAGTC'.upper(
    )  # TM: 58-59 C

    mt_threshold = 55.0
    gRNA_minimum_spacing = 5
    gRNA_5prime_spacing = 10

    print 'Starting oligo generation...'

    for label in labels:

        print 'Starting generation for {}...'.format(label)
        my_oligos[label] = []
        my_sites[label] = []
        my_names[label] = []

        name_index = 1

        for exon, intron in zip(sequences[label]['exons'],
                                sequences[label]['introns']):

            full_seq = intron[0] + exon + intron[1]
            full_seq_rev = Seq(full_seq).reverse_complement()
            shift = len(intron[0])  # shift for indexing correct regions
            shift_rev = len(intron[1])  # shift for indexing correct regions

            # Forward read

            ind = gRNA_5prime_spacing
            while ind < len(exon):
                if full_seq[shift + ind:shift + ind + 2] == 'GG':
                    site = full_seq[ind + shift - 20:ind + shift - 1]
                    oligo = Seq(overlap5 + 'G' + site)
                    oligo_rev = (Seq('G') + site +
                                 overlap3).reverse_complement()
                    if site == '':
                        ind += 1
                        continue
                    if mt.Tm_Wallace('G' + site) <= mt_threshold:
                        ind += 1
                        continue

                    my_oligos[label].append(oligo)
                    my_sites[label].append(site)
                    my_names[label].append('gRNA-TCR{}C-{}F'.format(
                        label, name_index))
                    my_oligos[label].append(oligo_rev)
                    my_sites[label].append(site)
                    my_names[label].append('gRNA-TCR{}C-{}F-RC'.format(
                        label, name_index))
                    name_index += 1
                    ind += gRNA_minimum_spacing
                else:
                    ind += 1

            # Reverse read

            ind = gRNA_5prime_spacing
            while ind < len(exon):
                if full_seq_rev[shift_rev + ind:shift_rev + ind + 2] == 'GG':
                    site = full_seq_rev[shift_rev + ind - 20:shift_rev + ind -
                                        1]
                    #oligo = overlap5 + 'G' + site + overlap3
                    oligo = overlap5 + 'G' + site
                    oligo_rev = (Seq('G') + site +
                                 overlap3).reverse_complement()
                    if site == '':
                        ind += 1
                        continue
                    if mt.Tm_Wallace('G' + site) <= mt_threshold:
                        ind += 1
                        continue

                    my_oligos[label].append(oligo)
                    my_sites[label].append(site)
                    my_names[label].append('gRNA-TCR{}C-{}R'.format(
                        label, name_index))
                    my_oligos[label].append(oligo_rev)
                    my_sites[label].append(site)
                    my_names[label].append('gRNA-TCR{}C-{}F-RC'.format(
                        label, name_index))
                    name_index += 1
                    ind += gRNA_minimum_spacing
                else:
                    ind += 1

            #raw_input()

        print 'Gene {}: {} sites'.format(label, len(my_oligos[label]) / 2)

    wells = ['{}{}'.format(a, b) for a in 'ABCDEFGH' for b in xrange(1, 13)]

    #'''#
    for label in ['A', 'B1', 'B2']:
        print '\nPlate; Well; Name; Sequence; MT (Wallace)'
        i = 0
        for name, site, oligo in zip(my_names[label], my_sites[label],
                                     my_oligos[label]):
            #print '{}; {}; {}'.format(wells[i%96],name,oligo)
            print 'TCR{}-{}; {}; {}; {}; {}'.format(label, 1 + (i / 96),
                                                    wells[i % 96], name, oligo,
                                                    mt.Tm_Wallace('G' + site))
            i += 1
    #'''#

    print 'HERRREE'

    B_oligos = my_oligos['B1'] + my_oligos['B2']
    print(len(B_oligos) - len(list(set(B_oligos)))) / 2

    #'''#
    for label in ['A', 'B1', 'B2']:
        print '\nNew plate:'
        i = 0
        for name, site, oligo in zip(my_names[label], my_sites[label],
                                     my_oligos[label]):
            if '-RC' in name:
                continue
            print '{}; {}; TM:{}'.format(name, site, mt.Tm_Wallace('G' + site))
Example #12
0
    tag_prefix = 'o'
    default_strandedness = 1

    def get_melting_temp(self):
        from Bio.SeqUtils import MeltingTemp

        # If the Tm is encoded in the oligo name, use that.
        if m := re.search(r'[-_ ](TM|Tm|tm)=?(\d+)', self.name):
            return float(m.group(2))

        # Otherwise, calculate a Tm using the Wallace rule.  This isn't a
        # particularly accurate method, but I chose it because it agrees most
        # closely with NEB's Tm calculator, which is what I've been using for
        # everything.
        else:
            return MeltingTemp.Tm_Wallace(self.seq)

    def get_tm(self):
        return self.melting_temp


class MakerInterface:
    # Maker classes are not required to actually inherit from this class, but
    # they are expected to implement this interface.

    # Note that I don't use autoprop on this class, because I don't want
    # getters to be part of the interface.  Subclasses are free to use
    # autoprop, though.

    @classmethod
    def make(self, db, products):
Example #13
0
from Bio.Seq import Seq
from Bio.SeqUtils import GC
from Bio.SeqUtils import MeltingTemp as mt
seq = Seq("AAGTGACAGGGATTG")
GC_per = GC(seq)
mt_p = mt.Tm_Wallace(seq)
print(GC_per)
print(mt_p)
Example #14
0
    end3 = end3[:n]
    counter = 0
    with open('file.txt', 'a') as file:
        file.write(i + '\n')
    while counter != 3:
        primers_and_seq = primers(end3, end5)
        prF = primers_and_seq[0]
        prR = primers_and_seq[1]
        end3 = primers_and_seq[2]
        end5 = primers_and_seq[3]
        if counter == 0:
            prF1 = prF
            prR1 = prR
        elif counter == 1:
            prF2 = prF
            prR2 = prR
        elif counter == 2:
            prF3 = prF
            prR3 = prR
        counter += 1
        with open('file.txt', 'a') as file:
            file.write(' >Forward primer ' + str(counter) + ': ' + '\n' +
                       str(prF) + ': Melting temperature' +
                       '%0.2f' % mt.Tm_Wallace(prF) + '\n' +
                       '>Revers primer ' + str(counter) + ': ' + '\n' +
                       str(prR) + ': Melting temperature' +
                       '%0.2f' % mt.Tm_Wallace(prR) + '\n')
    with open('file.txt', 'a') as file:
        file.write(
            rcr_vis(prF1, prF2, prF3, prR1, prR2, prR3, pcr_end3, pcr_end5))
Example #15
0
def temperatures(dic):
    Tw = round(mt.Tm_Wallace(dic, strict=False), 2)
    Tgc = round(mt.Tm_GC(dic, strict=False), 2)
    Tnn = round(mt.Tm_NN(dic, strict=False), 2)
    return Tw, Tgc, Tnn
Example #16
0
if __name__ == '__main__':
    args = parse_args()

    file, seq_format, fh = args.infile, args.format, None,
    if file:
        if not seq_format:
            found = re.search(r'(?i)(fasta|fa|fastq|fq)(.gz)?$', file)
            if not found:
                print(
                    "invalid file name suffix.\nfile name should like this: infile.[fasfa|fa|fastq|fq][.gz]",
                    file=sys.stderr)
                sys.exit(1)
            seq_format, is_gz = found.groups()
            if seq_format == 'fa':
                seq_format = 'fasta'
            if seq_format == 'fq':
                seq_format = 'fastq'

        fh = gzip.open(file, 'rt') if file.endswith('.gz') else open(file, 'r')
    else:
        fh = sys.stdin
        seq_format = args.format

    sys.stdout.write('{}\t{}\t{}\t{}\n'.format('seq_id', 'Tm_Wallace', 'Tm_GC',
                                               'Tm_NN'))
    for seq in SeqIO.parse(fh, seq_format):
        sys.stdout.write('{}\t{:0.2f}\t{:0.2f}\t{:0.2f}\n'.format(
            seq.id, mt.Tm_Wallace(seq.seq), mt.Tm_GC(seq.seq),
            mt.Tm_NN(seq.seq)))
    fh.close()
#!/usr/bin/env python

import sys
from Bio.SeqUtils import molecular_weight
from Bio.SeqUtils import MeltingTemp as mt

print("python: " + sys.version, end="\n", file=sys.stderr)
print(sys.argv[1], end="\n", file=sys.stderr)

with open(sys.argv[1]) as file:
    for line in file:
        row = line.rstrip('\n').split("\t")
        seq = row[3]

        if seq == 'cdna':
            row.extend(["tm_nn", "tm_gc", "tm_wallace"])
            print(",".join(row))

        else:
            mw = molecular_weight(seq, 'DNA', False)

            row.append('%0.2f' % mt.Tm_NN(seq))
            row.append('%0.2f' % mt.Tm_GC(seq))
            row.append('%0.2f' % mt.Tm_Wallace(seq))

            print(",".join(row))
1/1
  A  L  K  C  V
 C  L  E  M  C
M  P  *  N  V  *
atgccttgaaatgtgtag   38 %
tacggaactttacacatc
G  Q  F  T  Y 
 H  R  S  I  H  L
  A  K  F  H  T
"""

# 9 :  calculating Tm

# from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt

myseq = Seq("AGTCTGGGACGCGGCAATCGCA")
print(mt.Tm_Wallace(myseq))  # 72.0

# 10 : amino acid 1 to 3

from Bio.SeqUtils import seq1

essential_amino_acid_3 = "LeuLysMetValIleThrTrpPhe"
print(seq1(essential_amino_acid_3))  # LKMVITWF

from Bio.SeqUtils import seq3

essential_amino_acid_1 = "LKMVITWF"
print(seq3(essential_amino_acid_1))  # LeuLysMetValIleThrTrpPhe
Example #19
0
# 4.5.4.calc_melting_temperature.py
from Bio.SeqUtils import MeltingTemp as mt
from Bio.Seq import Seq

myseq = Seq("AGTCTGGGACGGCGCGGCAATCGCA")
print(mt.Tm_Wallace(myseq))  # 84.0 이 출력된다.
Example #20
0
def getTmWallace(seq):
    return mt.Tm_Wallace(seq)