Esempio n. 1
0
def get_thermo(dict, guide_sequence, context_sequence):
    # Use Biopython to get thermo info. from context and guides
    dict['Tm, context'] = MeltingTemp.Tm_NN(context_sequence)
    dict['Tm, 5mer-15'] = MeltingTemp.Tm_NN(guide_sequence[-5:])
    dict['Tm, 5mer-3'] = MeltingTemp.Tm_NN(guide_sequence[2:7])
    dict['Tm, middle'] = MeltingTemp.Tm_NN(guide_sequence[7:-5])
    return dict
Esempio n. 2
0
def evalPrimerPairMT(fprimer, rprimer, ret_mt=False):
    """This will check the melting temperature

    The optimal melting temperature of the primers is 60–64°C, with
    an ideal temperature of 62°C, which is based on typical cycling and reaction conditions
    and the optimum temperature for PCR enzyme function. Ideally, the melting temperatures of
    the 2 primers should not differ by more than 2°C in order for both primers to bind
    simultaneously and efficiently amplify the product.
    PCR parameters used are from IDT: Oligo 0.2 uM Na 50 mM, Mg 3 mM, dNTPs 0.8 mM
    :param ret_mt: """

    fprimer_MT = MeltingTemp.Tm_GC(fprimer, Na=50, Mg=3, dNTPs=0.8)
    rprimer_MT = MeltingTemp.Tm_GC(rprimer, Na=50, Mg=3, dNTPs=0.8)

    fprimer_MT_NN = MeltingTemp.Tm_NN(fprimer, Na=50, Mg=3, dNTPs=0.8)
    rprimer_MT_NN = MeltingTemp.Tm_NN(fprimer, Na=50, Mg=3, dNTPs=0.8)

    print(
        f"forw primer: {fprimer}\nforw primer MT: {fprimer_MT} {fprimer_MT_NN} \n"
        f"rev  primer: {rprimer}\nrev primer MT : {rprimer_MT} {rprimer_MT_NN} \n"
    )
    """Filters for primers that meet the MT standards"""
    if math.fabs(fprimer_MT - rprimer_MT) <= 3 and\
                max(fprimer_MT,rprimer_MT) <= 64 and\
                min(fprimer_MT, rprimer_MT) >= 60:

        print("MT of primer pair passed.\n")

        if ret_mt == False:
            return True
        else:
            return fprimer_MT, rprimer_MT
    else:
        print("MT for the primer pairs did not meet standards\n")
        return False
def grow_overlap(startpoint, seq):
    """
    Returns the sequence divided in three parts: 5', overlap and 3'.
    Overlap grows from the middle outwards, regardless of the oligo length limit, that
    is checked afterwards, and if it's not good enough codons get swapped randomly until
    the overlap has the proper length and Tm. The returned stings are the three parts of
    sequence: five prime unique section, the overlap that will be later added to both,
    and the three-prime unique section.
    """
    #Minimum length of the overlap must be len = Tm/4, which assumes that it's %100 GC:
    min_len = minmelt/4
    overlap = seq[startpoint - min_len/2 : startpoint] + seq[startpoint : startpoint + min_len/2 ]
    counter = 0
    firsthalf = seq[:(startpoint - min_len/2 - counter)]
    tm = MeltingTemp.Tm_NN(overlap, Na=50, K=0, Tris=0, Mg=args.MgmM, dNTPs=args.dNTPsmM )
    gc = GC(overlap) 
    while (min_len/2 + counter < args.maxoverlaplen) and ( (tm < minmelt) or ( (gc < 40) or (gc > 60) ) ): # GC% must be between 40 and 60, Tm should be above mininimum, and the overlap should not be longer than 30bps
        counter += 1
        overlap = seq[startpoint - min_len/2 - counter] + overlap + seq[startpoint + min_len/2 + counter - 1]
        tm = MeltingTemp.Tm_NN(overlap, Na=50, K=0, Tris=0, Mg=args.MgmM, dNTPs=args.dNTPsmM )
        gc = GC(overlap)
    #print(counter)
    firsthalf = seq[:(startpoint - min_len/2 - counter)]
    secondhalf = seq[(startpoint + min_len/2 + counter):len(seq)]
    #print len(firsthalf + overlap + secondhalf), len(seq)
    #print(firsthalf)
    #print(overlap)
    #print(secondhalf)
    assert len(firsthalf + overlap + secondhalf) == len(seq)
    assert firsthalf + overlap + secondhalf == seq
    #assert str(Seq(seq, unambiguous_dna).translate()) == protein_seqs[design] # this is done later
    return firsthalf, overlap, secondhalf
Esempio n. 4
0
def get_thermo(dict, guide_sequence, context_sequence):
    # Use Biopython to get thermo info. from context and guides
    dict['Tm, context'] = MeltingTemp.Tm_NN(context_sequence)
    third = len(guide_sequence)//3
    dict['Tm, start'] = MeltingTemp.Tm_NN(guide_sequence[0:third])
    dict['Tm, mid'] = MeltingTemp.Tm_NN(guide_sequence[third:2*third])
    dict['Tm, end'] = MeltingTemp.Tm_NN(guide_sequence[2*third:])
    return dict
def possible_reverse_seq(seq_d, n, seq_length, mt_min, mt_max, na, tris, mg,
                         dntps, saltcor):
    a = seq_d[n:n + seq_length]
    if (a[0] == 'C' or a[0] == 'G') and len(a) == seq_length:
        if mt_min < mt.Tm_NN(
                a, Na=na, Tris=tris, Mg=mg, dNTPs=dntps,
                saltcorr=saltcor) and mt.Tm_NN(
                    a, Na=na, Tris=tris, Mg=mg, dNTPs=dntps,
                    saltcorr=saltcor) < mt_max:
            return str(a)
Esempio n. 6
0
def seqProbes(mb_seq, mb_size, mb_sscount, probe):
    result = list(itersplit_into_x_chunks(mb_seq, mb_size, probe))
    basesl = []
    for i in result:
        i = reverse_complement(i)
        basesl.append(i)

    basesp = []
    for i in result:
        i = parallel_probe(i)
        basesp.append(i)

    Tml = []
    for i in basesl:
        Tmx = mt.Tm_NN(i,
                       dnac1=50000,
                       dnac2=50000,
                       Na=100,
                       nn_table=mt.RNA_NN1,
                       saltcorr=1)
        Tml.append(int(Tmx))
    result_basesa = list(itersplit_into_x_chunks(
        mb_bases, mb_size, probe))  #list of lists of each base for each probe
    #base number as j and list of these numbers as jl, list of percent of Gs and Cs as perl

    Tmp = []
    for i in basesp:
        Tmx = mt.Tm_NN(i,
                       dnac1=50000,
                       dnac2=50000,
                       Na=100,
                       nn_table=mt.RNA_NN1,
                       saltcorr=1)
        Tmp.append(int(Tmx))
    result_basesp = list(itersplit_into_x_chunks(mb_bases, mb_size, probe))

    j = 0
    perl = []
    jl = []
    for i in result_basesa:
        j += 1
        aas = i.count('A')
        gs = i.count('G')
        per = int((aas + gs) / probe * 100)
        perl.append(per)
        jl.append(j)
    size2 = len(mb_sscount)
    result2 = list(itersplit_into_x_chunks(mb_sscount, size2, probe))
    sumsl = []
    for i in result2:
        i = list(map(int, i))
        sums = sum(i) / (probe * mb_so)
        sumsl.append(sums)
    return (jl, perl, sumsl, basesl, Tml, Tmp, basesp
            )  #put together all data as indicated in header
Esempio n. 7
0
 def find_left_primer(self, seq, optimal_tm=54):
     seqO = Seq(seq)
     seqs = []
     for _len in range(10, 60):
         if _len <= len(seq):
             seqO = Seq(seq[:_len])
             seqs.append([seq[:_len], abs(optimal_tm - mt.Tm_NN(seqO))])
         else:
             seqO = Seq(seq)
             seqs.append([seq, abs(optimal_tm - mt.Tm_NN(seqO))])
             break
     seqs = sorted(seqs, key=lambda x: x[1])
     best = seqs[0]
     return best[0]
Esempio n. 8
0
def featurize(data, exp_nm, seq_col):
  X_all = []
  start_pos, end_pos = -9, 21   # go up to N in NGG

  for idx, row in data.iterrows():
    x_input = row[seq_col]
    # zero_idx = _data.pos_to_idx(0, exp_nm)
    zero_idx = _data.pos_to_idx_safe(0, exp_nm, row['Name (unique)'])
    seq = x_input[zero_idx + start_pos : zero_idx + end_pos + 1]
    assert len(seq) == 31

    curr_x = []

    # One hot encoding
    curr_x += one_hot_encode(seq)

    # Dinucleotides
    curr_x += dinucleotide_encode(seq)

    # Sum nucleotides
    features = [
      seq.count('A'),
      seq.count('C'),
      seq.count('G'),
      seq.count('T'),
      seq.count('G') + seq.count('C'),
    ]
    curr_x += features

    # Melting temp
    from Bio.SeqUtils import MeltingTemp as mt
    features = [
      mt.Tm_NN(seq),
      mt.Tm_NN(seq[-5:]),
      mt.Tm_NN(seq[-13:-5]),
      mt.Tm_NN(seq[-21:-13]),
    ]
    curr_x += features

    # Store
    X_all.append(np.array(curr_x))

  ohe_nms = get_one_hot_encoder_nms(start_pos, end_pos)
  dint_nms = get_dinucleotide_nms(start_pos, end_pos)
  sum_nms = ['Num. A', 'Num. C', 'Num. G', 'Num. T', 'Num. GC']
  mt_nms = ['Tm full', 'Tm -5', 'Tm -13 to -5', 'Tm -21 to -13']
  param_nms = ['x_%s' % (ft_nm) for ft_nm in ohe_nms + dint_nms + sum_nms + mt_nms]

  return (np.array(X_all), param_nms)
Esempio n. 9
0
def main():
    for seq in seqarr:
        s = Seq(seq)
        res = mt.Tm_NN(s, check, strict, c_seq, shift, nn_table, tmm_table,
                       imm_table, de_table, dnac1, dnac2, selfcomp, Na, K,
                       Tris, Mg, dNTPs, saltcorr)
        print('%0.2f' % res)
Esempio n. 10
0
 def tm_func(primers):
     """ Calculates the nearest neighbor melting temperature using
     the user specified pcr salt parameters. When there are multiple
     sequences due to ambiguous bases, an average Tm is returned.
     """
     return np.mean(
         [MeltingTemp.Tm_NN(primer, **tm_params) for primer in primers])
Esempio n. 11
0
def gene_feature(Y, X, learn_options):
    '''
    Things like the sequence of the gene, the DNA Tm of the gene, etc.
    '''

    gene_names = Y['Target gene']

    gene_length = np.zeros((gene_names.values.shape[0], 1))
    gc_content = np.zeros((gene_names.shape[0], 1))
    temperature = np.zeros((gene_names.shape[0], 1))
    molecular_weight = np.zeros((gene_names.shape[0], 1))

    for gene in gene_names.unique():
        seq = util.get_gene_sequence(gene)
        gene_length[gene_names.values == gene] = len(seq)
        gc_content[gene_names.values == gene] = SeqUtil.GC(seq)
        temperature[gene_names.values == gene] = Tm.Tm_NN(seq, rna=False)
        molecular_weight[gene_names.values == gene] = SeqUtil.molecular_weight(
            seq, 'DNA')

    all = np.concatenate(
        (gene_length, gc_content, temperature, molecular_weight), axis=1)
    df = pandas.DataFrame(data=all,
                          index=gene_names.index,
                          columns=[
                              'gene length', 'gene GC content',
                              'gene temperature', 'gene molecular weight'
                          ])
    return df
 def BedprobeTm(self, seq7):
     """Tm calculation function for use with .bed output."""
     bedTmVal = float(('%0.2f' % mt.Tm_NN(seq7, Na=self.sal,
                                          dnac1=self.conc1,
                                          dnac2=self.conc2)))
     bed_fcorrected = ('%0.2f' % mt.chem_correction(bedTmVal, fmd=self.form))
     return bed_fcorrected
def get_max_domain_melt(dna_structure, staple_indices, scaffold_rotation, scaffold_id, print_staples):
    # physical scaffold sequence
    scaffold_sequence = get_sequence(dna_structure.strands[scaffold_id]).replace('N', '')
    # physical scaffold length
    scaffold_length = len(scaffold_sequence)
    #print(staple_indices)
    #loop through strands
    staple_domain_melt = []
    for strand in staple_indices:
        #cur_strand= []
        cur_domain_melt = []
        # loop through domain
        for domain in strand:
            # loop through bases in DOMAIN
            cur_domain = []
            for baseindex in domain:
                # physical index in scaffold
                i_physical = (baseindex+scaffold_rotation)%scaffold_length

                #dna_structure.strands[scaffold_id].tour[i_physical+offset].seq
                cur_domain.append(scaffold_sequence[i_physical])
            if len(cur_domain)>1:
                # compute melting temperature of domain; reverse sequence of cur_domain, since it is on the scaffold and the indices follow staples
                cur_domain_melt.append(MeltingTemp.Tm_NN(Seq(''.join(cur_domain[::-1]))))
            else:
                cur_domain_melt.append(0.)
            #domain_seq_on_scaffold = Seq(''.join(cur_domain[::-1]), generic_dna)
            #cur_strand.append(str(domain_seq_on_scaffold.reverse_complement()))

        staple_domain_melt.append(max(cur_domain_melt))
        #if print_staples:
        #    print(str(cur_strand))
    return staple_domain_melt
Esempio n. 14
0
def probeTm(seq1, saltConc, formConc):
    """Calculates the melting temperature of a given sequence under the
    specified salt and formamide conditions."""

    tmval = float(('%0.2f' % mt.Tm_NN(seq1, Na=saltConc)))
    fcorrected = ('%0.2f' % mt.chem_correction(tmval, fmd=formConc))
    return fcorrected
Esempio n. 15
0
def main(argv):
    fout = open("./test.fa", 'w+')
    fafile = '/Users/yeweijian/Downloads/data/hg19.fa'
    bedfile = '/Users/yeweijian/Downloads/data/test.bed'

    parser = argparse.ArgumentParser(description='python Rundesign.py ')
    parser.add_argument('--FA',
                        type=str,
                        default=fafile,
                        help='the reference fasta file')
    parser.add_argument('--BED',
                        type=str,
                        default=bedfile,
                        help='the target region file')
    args = parser.parse_args()

    fafile = args.FA
    bedfile = args.BED

    file_exists(fafile)
    file_exists(bedfile)

    #读取fasta文件
    fh = pysam.Fastafile(fafile)

    #sal = 390 #The mM Na+ concentration to be used for Tm
    #form = 50  #The percent formamide to be used for Tm

    #读取区间文件,提取序列信息
    for line in open(bedfile):
        chrom, start, end = line.rstrip().split('\t')
        start = int(start)
        end = int(end)
        regionsize = int(end) - int(start)

        #区间太小,直接取区间序列
        if regionsize <= 120:
            seq = Seq(fh.fetch(reference=chrom, start=start, end=end),
                      IUPAC.unambiguous_dna)
            faout(seq, chrom, start, end)
            #Tm = probeTm(seq, sal, form)

            #print("%0.2f" % mt.Tm_NN(seq))

            print(
                '>{}:{}-{} Repeat:{:.3f} GC:{:.3f} Nrate:{:.3f} Tm:{}'.format(
                    chrom, start, end, repeatstat(seq), GC(seq), nstat(seq),
                    mt.Tm_NN(seq)),
                file=fout)
            print(seq, file=fout)

        else:
            for p1 in range(start, end):
                p2 = start + 119
                if p2 >= end:
                    break
                else:
                    seq = Seq(fh.fetch(reference=chrom, start=p1, end=p2),
                              IUPAC.unambiguous_dna)
                    faout(seq, chrom, p1, p2)
Esempio n. 16
0
 def __probeTm(self):
     """
     Calculates the melting temperature of a given sequence under the
     specified salt and formamide conditions.
     """
     tmval = float(mt.Tm_NN(self.seq, Na=self.sal))
     Tm = ('%0.2f' % mt.chem_correction(tmval, fmd=int(self.form)))
     return Tm
Esempio n. 17
0
def PenaltyMeltingTemperature(Primer: str):

    # -- Melting Temperature
    MTemp = mt.Tm_NN(Primer)

    Penalty = ValueInBounds(MTemp, 55, 3)

    return Penalty
Esempio n. 18
0
def cross_hybrid_score_worker(primer_1, primer_2, tm_params):
    score_dict = {
        ('G', 'C'): 4,
        ('A', 'T'): 2,
        ('C', 'G'): 4,
        ('C', 'A'): -0.6,
        ('T', ): -0.6,
        ('A', ): -0.6,
        ('G', ): -0.6,
        ('C', ): -0.6,
        ('A', 'C'): -0.6,
        ('C', 'T'): -0.6,
        ('A', 'G'): -0.4,
        ('G', 'A'): -0.4,
        ('G', 'T'): -0.4,
        ('T', 'G'): -0.4,
        ('T', 'A'): 2,
        ('T', 'C'): -0.6
    }
    comp = [('C', 'G'), ('A', 'T'), ('T', 'A'), ('G', 'C')]
    weighted_comp = {
        ('C', 'G'): 4,
        ('A', 'T'): 2,
        ('T', 'A'): 2,
        ('G', 'C'): 4
    }

    matches = list(
        map(lambda x: 1
            if tuple(set(x)) in comp else 0, zip(primer_1, primer_2)))
    # Calculate the number of bases that align
    max_comp = sum(matches)
    # weighted_matches = list(
    #     map(lambda x: weighted_comp.get(tuple(set(x)), 0),
    #     zip(primer_1, primer_2) ))
    # max_comp = (2 * sum(weighted_matches)) / \
    #     (4* float(len(primer_1) + len(primer_2))) * 100

    longest_run = 0
    complementary = [list(comp) for run, comp in it.groupby(matches)]
    for comp in complementary:
        if 1 in comp and len(comp) > longest_run:
            longest_run = len(comp)
    try:
        start = matches.index(1)
        stop = matches[::-1].index(1)
    except ValueError:
        # No hybridization
        return 0, 0, 0
    p1 = primer_1[start:-stop]
    p2 = primer_2[start:-stop]
    try:
        score = mt.Tm_NN(primer_1, c_seq=primer_2, **tm_params)
    except ValueError:
        # super rough approximation when thermodynamic data is not available
        score = sum(map(lambda x: score_dict[tuple(set(x))], zip(p1, p2)))
    return score, max_comp, longest_run
Esempio n. 19
0
 def createPrimer(INP):
     (i, Primer) = INP
     return OrderedDict({
         "Primer": "%s %s" % (PrimerIdentifier, IDs[i]),
         "Sequence": PrimerPair[i],
         "GCContent": PenaltyGCContent(Primer),
         "GCExtremities": PenaltyGCExtremities(Primer),
         "Tm p": PenaltyMeltingTemperature(Primer),
         "Tm": mt.Tm_NN(Primer)
     })
Esempio n. 20
0
def __featurize(seq):
    '''
    start_pos, end_pos = -9, 21   # go up to N in NGG

  '''
    curr_x = []
    pos_to_idx = lambda pos: pos + 19
    seq = seq[pos_to_idx(-9):pos_to_idx(21) + 1]

    # One hot encoding
    curr_x += __one_hot_encode(seq)

    # Dinucleotides
    curr_x += __dinucleotide_encode(seq)

    # Sum nucleotides
    features = [
        seq.count('A'),
        seq.count('C'),
        seq.count('G'),
        seq.count('T'),
        seq.count('G') + seq.count('C'),
    ]
    curr_x += features

    # Melting temp
    from Bio.SeqUtils import MeltingTemp as mt
    features = [
        mt.Tm_NN(seq),
        mt.Tm_NN(seq[-5:]),
        mt.Tm_NN(seq[-13:-5]),
        mt.Tm_NN(seq[-21:-13]),
    ]
    curr_x += features

    # ohe_nms = __get_one_hot_encoder_nms(start_pos, end_pos)
    # dint_nms = __get_dinucleotide_nms(start_pos, end_pos)
    # sum_nms = ['Num. A', 'Num. C', 'Num. G', 'Num. T', 'Num. GC']
    # mt_nms = ['Tm full', 'Tm -5', 'Tm -13 to -5', 'Tm -21 to -13']
    # param_nms = ['x_%s' % (ft_nm) for ft_nm in ohe_nms + dint_nms + sum_nms + mt_nms]
    # return (np.array(X_all), param_nms)
    return np.array(curr_x).reshape(1, -1)
Esempio n. 21
0
def mytm(seq):
    return mt.Tm_NN(seq,
                    dnac1=500,
                    dnac2=0,
                    selfcomp=False,
                    Na=0,
                    K=50,
                    Tris=25,
                    Mg=2,
                    dNTPs=0.2,
                    saltcorr=5)
Esempio n. 22
0
    def melting_temperature(primer: str) -> float:
        """Calculate a melting temperature of the nucleotide sequence

        Returns
        -------
        Calculated temperature or 0 if it can't be calculated
        """
        mt = MT.Tm_NN(primer)
        if mt < 0:
            mt = -1

        return round(mt, 2)
Esempio n. 23
0
    def _is_melting_temp_pass(self, seq, tm_low, tm_high):
        """
        >>> synth_obj._is_melting_temp_pass(seq='AAAAAAAAAATTTTTTTTTT', tm_low=36.0, tm_high=73.0)
        False
        >>> synth_obj._is_melting_temp_pass(seq='GCGCGCGCGCGCATATATAT', tm_low=36.0, tm_high=73.0)
        True
        """

        if tm_low <= MeltingTemp.Tm_NN(seq, dnac1=250.0, dnac2=0.0,
                                       saltcorr=7) <= tm_high:
            return True
        return False
Esempio n. 24
0
def Feature_Extraction(lines):
    data_n = len(lines)
    DATA_X = zeros((data_n, 34, 4), dtype=int)
    DATA_Y = zeros((data_n, ), dtype=float)
    SEQ = []

    for l in range(data_n):
        data = lines[l].split()
        seq = data[1]
        SEQ.append(seq)
        for i in range(34):
            if seq[i] in "Aa": DATA_X[l, i, 0] = 1
            elif seq[i] in "Cc": DATA_X[l, i, 1] = 1
            elif seq[i] in "Gg": DATA_X[l, i, 2] = 1
            elif seq[i] in "Tt": DATA_X[l, i, 3] = 1
        DATA_Y[l] = float(data[0])

    #Feature Extraction
    DATA_X_FE = zeros((data_n, 689), dtype=float)
    for l in range(data_n):
        #position-independent nucleotides and dinucleotides (4 + 4^2 = 20)
        for i in range(4):
            DATA_X_FE[l, i] = sum(DATA_X[l, :, i])
        for i in range(4, 20):
            DATA_X_FE[l, i] = Dinucleotide_FE(DATA_X[l], (i - 4) / 4,
                                              (i - 4) % 4)

        #position-dependent nucleotides and dinucleotides ( 4*34 + (4^2 * 33) = 664)
        for i in range(20, 156):
            DATA_X_FE[l, i] = DATA_X[l, (i - 20) / 4, (i - 20) % 4]
        for i in range(156, 684):
            DATA_X_FE[l, i] = Dinucleotide_FE(
                DATA_X[l, (i - 156) / 16:(i - 156) / 16 + 2, :],
                ((i - 156) % 16) / 4, ((i - 156) % 16) % 4)

        #Melting temperatiure (1)
        DATA_X_FE[l, 684] = mt.Tm_NN(SEQ[l])

        #GC count (3)
        DATA_X_FE[l, 685] = SEQ[l].count("G") + SEQ[l].count(
            "g") + SEQ[l].count("C") + SEQ[l].count("c")
        if DATA_X_FE[l, 685] <= 9:
            DATA_X_FE[l, 686] = 1
            DATA_X_FE[l, 687] = 0
        else:
            DATA_X_FE[l, 686] = 0
            DATA_X_FE[l, 687] = 1

        #Free energy
        DATA_X_FE[l, 688] = RNA.fold(SEQ[l])[1]

    return DATA_X_FE, DATA_Y
Esempio n. 25
0
 def calc_tm_value(self):
     self.primer_fw_seq = Seq(self.primer_fw.upper())
     self.primer_rv_seq = Seq(self.primer_rv.upper())
     self.tm_value_Wallace = np.mean([
         mt.Tm_Wallace(self.primer_fw_seq),
         mt.Tm_Wallace(self.primer_rv_seq)
     ])  # GC法で計算
     self.tm_value_GC = np.mean([
         mt.Tm_GC(self.primer_fw_seq, Na=50, valueset=7),
         mt.Tm_GC(self.primer_rv_seq, Na=50, valueset=7)
     ])  # GC法で計算
     self.tm_value_NN = np.mean([
         mt.Tm_NN(self.primer_fw_seq, Na=50, nn_table=mt.DNA_NN1),
         mt.Tm_NN(self.primer_rv_seq, Na=50, nn_table=mt.DNA_NN1)
     ])  # 最近接塩基法で計算
     self.tm_table_column = ["計算手法", "Tm値 (°C)"]
     self.tm_list = [["Wallace法",
                      round(self.tm_value_Wallace, 1)],
                     ["GC法", round(self.tm_value_GC, 1)],
                     ["最近接塩基法", round(self.tm_value_NN, 1)]]
     self.tm_table = pd.DataFrame(self.tm_list,
                                  columns=self.tm_table_column)
Esempio n. 26
0
def calculate_tm(df, Na=390, dnac1_oligo=5):
    qseq_array = df.qseq.values
    sseq_array = df.sseq.values
    tm_array = np.zeros(len(qseq_array))
    for i in range(len(qseq_array)):
        qseq = qseq_array[i]
        cseq = Seq(sseq_array[i]).complement()
        tm_array[i] = mt.Tm_NN(qseq,
                               Na=Na,
                               saltcorr=7,
                               dnac1=dnac1_oligo * 15,
                               dnac2=1)
    return (tm_array)
Esempio n. 27
0
 def __init__(self, seq, position, specificity=None):
     self.seq = seq.upper()
     self.position = position
     self.length = len(seq)
     self.gcFrac = (seq.upper().count('G') +
                    seq.upper().count('C')) / len(seq)
     if 'X' in self.seq:
         self.tm = 0
     else:
         self.tm = mt.Tm_NN(Seq(seq), Na=300, dnac1=5, dnac2=1, saltcorr=7)
     if specificity == None:
         self.specificity = None
     else:
         self.specificity = specificity
Esempio n. 28
0
def generate_primer(sequence,
                    prefix='',
                    target_tm=60,
                    primer=50,
                    Na=50,
                    K=None,
                    Mg=None,
                    dNTPs=None,
                    Tris=None):
    """
    Create an amplification primer for a given sequence

    :param sequence: DNA string
    :return: A list of DNA primers
    """

    # Make sure sequence is DNA (ATCG)
    if re.fullmatch('[ATCG]+', sequence.upper()) is None:
        raise Exception('Input sequence is not valid DNA!')
    if prefix != '' and re.fullmatch('[ATCG]+', prefix.upper()) is None:
        raise Exception('Input prefix is not valid DNA!')

    target_primer = None

    for primer_length in range(len(sequence) + 1):
        primer_TM = mt.Tm_NN(
            Seq(sequence[:primer_length]),
            nn_table=mt.DNA_NN2,
            dnac1=primer / 2,  # nM Primers / 2
            dnac2=primer / 2,  # nM Primers / 2
            selfcomp=False,
            Na=Na,  # mM
            K=K or 0,  # mM
            Tris=Tris or 0,  # mM
            Mg=Mg or 0,  # mM
            dNTPs=dNTPs or 0,
            saltcorr=5)
        if primer_TM >= target_tm:
            target_primer = sequence[:primer_length]
            break

    if target_primer is None:
        print(
            'A primer with the desired TM could not be generated for the input sequence!'
        )
        return None

    return prefix + target_primer
Esempio n. 29
0
def get_Tm(seq):
	"""	Calculate melting temperature for DNA oligomer

	Melting tempearture is based on Primestar PCR Premix condition
	
	Args:
		seq: string of DNA sequence

	Returns:
		Tm: melting temperature (Celcius)
	"""
	myseq = Seq(seq)

	# note for line below: all mM except dnacl which uses nM
	Tm = mt.Tm_NN(myseq, dnac1=250, Na=0, K=10, Tris=100, Mg=2, dNTPs=0.8) 
	return Tm
Esempio n. 30
0
 def _create_staple_max_melt_T(self) -> Dict[Strand, float]:
     """ max_melt_T is the staple domain with the highest metling temperature"""
     staple_domains_melt_t: Dict[Strand, List[float]] = dict()
     for staple in self.staples:
         domains = staple.domain_list
         for domain in domains:
             if "N" not in domain.sequence:
                 # NOTE: using nearest neighbor for domain with length higher
                 #   than 14 using 'Wallace rule' else
                 if len(domain.base_list) > 14:
                     staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_NN(
                         Seq(domain.sequence), Na=0, Mg=17.5))
                 else:
                     staple_domains_melt_t.setdefault(staple, []).append(MeltingTemp.Tm_Wallace(
                         Seq(domain.sequence)))
     max_staple_melt_t = {key: max(value) for (
         key, value) in staple_domains_melt_t.items()}
     return max_staple_melt_t