Ejemplo n.º 1
0
def optimal_alignment_gaff_yesimon(s, t, gap_init= -11, gap_ext= -1):
    sl, tl = len(s), len(t)
    if sl < tl:
        return optimal_alignment_gaff_yesimon(t, s, gap_init, gap_ext)
    m = {(0, 0): (0, None)}
    f = {}
    g = {}
    h = {}
    m.update({((i, 0), (gap_init + gap_ext * (i - 1), (i - 1, 0))) for i in range(1, sl + 1)})
    m.update({((0, i), (gap_init + gap_ext * (i - 1), (0, i - 1))) for i in range(1, tl + 1)})
    for i, j in product(range(1, sl + 1), range(1, tl + 1)):
        cost = blosum62.get((s[i - 1], t[j - 1]))
        if cost == None:
            cost = blosum62.get((t[j - 1], s[i - 1]))
        f[(i, j)] = m[(i - 1, j - 1)][0] + cost
        gg = g.get((i - 1, j))
        if gg != None:
            gg += gap_ext
        hh = h.get((i, j - 1))
        if hh != None:
            hh += gap_ext
        g[(i, j)] = max(m[(i - 1, j)][0] + gap_init, gg)
        h[(i, j)] = max(m[(i, j - 1)][0] + gap_init, hh)
        v = max(f[(i, j)], g[(i, j)], h[(i, j)])
        if v == f[(i, j)]: m[(i, j)] = (v, (i - 1, j - 1))
        elif v == g[(i, j)]: m[(i, j)] = (v, (i - 1, j))
        elif v == h[(i, j)]: m[(i, j)] = (v, (i, j - 1))
    retval = m[(i, j)]
    print sl, tl
    for i in xrange(sl + 1):
        for j in xrange(tl + 1):
            print m[(i, j)] if (i, j) in m else '-',
        print
    print m
    return retval
Ejemplo n.º 2
0
def maxAlignscorgMat(s, t):
    string_s = len(s)
    string_t = len(t)
    AlnScore = {(0, 0): (0, None)}

    AlnScore.update({((m, 0), (m * -5, (m - 1, 0)))
                     for m in range(1, string_s + 1)})
    AlnScore.update({((0, m), (m * -5, (0, m - 1)))
                     for m in range(1, string_t + 1)})

    for m, n in product(range(1, string_s + 1), range(1, string_t + 1)):
        cost = blosum62.get((s[m - 1], t[n - 1]))
        if cost == None:
            cost = blosum62.get((t[n - 1], s[m - 1]))
        a = AlnScore[(m - 1, n - 1)][0] + cost
        b = AlnScore[(m - 1, n)][0] - 5
        c = AlnScore[(m, n - 1)][0] - 5
        max_align = max(a, b, c)

        if a == max_align:
            AlnScore[(m, n)] = (max_align, (m - 1, n - 1))
        elif b == max_align:
            AlnScore[(m, n)] = (max_align, (m - 1, n))
        elif c == max_align:
            AlnScore[(m, n)] = (max_align, (m, n - 1))
    return AlnScore[(m, n)][0]
Ejemplo n.º 3
0
def globAlign(s, t):
    protein_s = len(s)
    protein_t = len(t)
    maximumAlnScore = {(0, 0): (0, None)}

    a, b, c = {}, {}, {}

    maximumAlnScore.update({((m, 0), (-5, (m - 1, 0)))
                            for m in range(1, protein_s + 1)})
    maximumAlnScore.update({((0, m), (-5, (0, m - 1)))
                            for m in range(1, protein_t + 1)})

    for m, n in product(range(1, protein_s + 1), range(1, protein_t + 1)):
        cost = blosum62.get((s[m - 1], t[n - 1]))
        if cost == None:
            cost = blosum62.get((t[n - 1], s[m - 1]))
        a[(m, n)] = maximumAlnScore[(m - 1, n - 1)][0] + cost
        b[(m, n)] = max(maximumAlnScore[(m - 1, n)][0] - 5, b.get((m - 1, n)))
        c[(m, n)] = max(maximumAlnScore[(m, n - 1)][0] - 5, c.get((m, n - 1)))

        score = max(a[(m, n)], b[(m, n)], c[(m, n)])
        if score == a[(m, n)]:
            maximumAlnScore[(m, n)] = (score, (m - 1, n - 1))
        elif score == b[(m, n)]:
            maximumAlnScore[(m, n)] = (score, (m - 1, n))
        elif score == c[(m, n)]:
            maximumAlnScore[(m, n)] = (score, (m, n - 1))
    return maximumAlnScore[(m, n)][0]
Ejemplo n.º 4
0
def calcBLOSUMfeatures(SAV_coords):
    feat_dtype = np.dtype([('BLOSUM', 'f')])
    features = np.zeros(len(SAV_coords), dtype=feat_dtype)
    for i, SAV in enumerate(SAV_coords):
        aa1 = SAV['aa_wt']
        aa2 = SAV['aa_mut']
        features[i] = blosum62.get((aa1, aa2), blosum62.get((aa2, aa1)))
    return features
def score(A, B):
    scores = [
        #get(A,B) dan juga get(B,A), karena berbentuk tabel blosum hanya berbentuk matriks segitiga
        blosum62.get((A[i], B[i]), blosum62.get((B[i], A[i])))
        for i in range(len(A))
    ]
    
    return sum(scores)
Ejemplo n.º 6
0
def get_score(s, t):
    sl, tl = len(s), len(t)
    m = {(0, 0): (0, None)}
    f, g, h = {}, {}, {}

    m.update({((i, 0), (-5, (i - 1, 0))) for i in range(1, sl + 1)})
    m.update({((0, i), (-5, (0, i - 1))) for i in range(1, tl + 1)})

    for i, j in product(range(1, sl + 1), range(1, tl + 1)):
        cost = blosum62.get((s[i - 1], t[j - 1]))

        if cost == None:
            cost = blosum62.get((t[j - 1], s[i - 1]))
        f[(i, j)] = m[(i - 1, j - 1)][0] + cost
        g[(i, j)] = max(m[(i - 1, j)][0] - 5, g.get((i - 1, j)))
        h[(i, j)] = max(m[(i, j - 1)][0] - 5, h.get((i, j - 1)))
        v = max(f[(i, j)], g[(i, j)], h[(i, j)])
        if v == f[(i, j)]:
            m[(i, j)] = (v, (i - 1, j - 1))
        elif v == g[(i, j)]:
            m[(i, j)] = (v, (i - 1, j))
        elif v == h[(i, j)]:
            m[(i, j)] = (v, (i, j - 1))
    return m[(i, j)][0]
Ejemplo n.º 7
0
def get_max_alignment(s, t):
    sl, tl = len(s), len(t)
    m = {(0, 0): (0, None)}
    m.update({((i, 0), (i * - 5, (i - 1, 0))) for i in range(1, sl + 1)})
    m.update({((0, i), (i * - 5, (0, i - 1))) for i in range(1, tl + 1)})
    
    for i, j in product(range(1, sl + 1), range(1, tl + 1)):
        cost = blosum62.get((s[i - 1], t[j - 1]))
        
        if cost == None:
            cost = blosum62.get((t[j - 1], s[i - 1]))
        d = m[(i - 1, j - 1)][0] + cost
        l = m[(i - 1, j)][0] - 5
        u = m[(i, j - 1)][0] - 5
        b = max(d, l, u)
        
        if d == b:
            m[(i, j)] = (b, (i - 1, j - 1))
        elif l == b:
            m[(i, j)] = (b, (i - 1, j))
        elif u == b:
            m[(i, j)] = (b, (i, j - 1))
            
    return m[(i, j)][0]
Ejemplo n.º 8
0
def get_score(s, t):
    sl, tl = len(s), len(t)
    m = {(0, 0): (0, None)}
    f, g, h = {}, {}, {}
    
    m.update({((i, 0), (-5, (i - 1, 0))) for i in range(1, sl + 1)})
    m.update({((0, i), (-5, (0, i - 1))) for i in range(1, tl + 1)})
    
    for i, j in product(range(1, sl + 1), range(1, tl + 1)):
        cost = blosum62.get((s[i - 1], t[j - 1]))
        
        if cost == None:
            cost = blosum62.get((t[j - 1], s[i - 1]))
        f[(i, j)] = m[(i - 1, j - 1)][0] + cost
        g[(i, j)] = max(m[(i - 1, j)][0] - 5, g.get((i - 1, j)))
        h[(i, j)] = max(m[(i, j - 1)][0] - 5, h.get((i, j - 1)))
        v = max(f[(i, j)], g[(i, j)], h[(i, j)])
        if v == f[(i, j)]:
            m[(i, j)] = (v, (i-1, j - 1))
        elif v == g[(i, j)]:
            m[(i, j)] = (v, (i - 1, j))
        elif v == h[(i, j)]:
            m[(i, j)] = (v, (i, j - 1))
    return m[(i,j)][0]
Ejemplo n.º 9
0
def get_max_alignment(s, t):
    sl, tl = len(s), len(t)
    m = {(0, 0): (0, None)}
    m.update({((i, 0), (i * -5, (i - 1, 0))) for i in range(1, sl + 1)})
    m.update({((0, i), (i * -5, (0, i - 1))) for i in range(1, tl + 1)})

    for i, j in product(range(1, sl + 1), range(1, tl + 1)):
        cost = blosum62.get((s[i - 1], t[j - 1]))

        if cost == None:
            cost = blosum62.get((t[j - 1], s[i - 1]))
        d = m[(i - 1, j - 1)][0] + cost
        l = m[(i - 1, j)][0] - 5
        u = m[(i, j - 1)][0] - 5
        b = max(d, l, u)

        if d == b:
            m[(i, j)] = (b, (i - 1, j - 1))
        elif l == b:
            m[(i, j)] = (b, (i - 1, j))
        elif u == b:
            m[(i, j)] = (b, (i, j - 1))

    return (m[(i, j)][0])
Ejemplo n.º 10
0
 def match_callback(c1, c2):
     return blosum62.get((c1, c2), 1 if c1 == c2 else -4)
Ejemplo n.º 11
0
iy = [[0] * (len(s2) + 1) for i in xrange(len(s1) + 1)]

for i in xrange(1, len(s1) + 1):
    m[i][0] = -100
    ix[i][0] = -b - (i - 1) * a
    iy[i][0] = -100
for j in xrange(1, len(s2) + 1):
    m[0][j] = -100
    ix[0][j] = -100
    iy[0][j] = -b - (j - 1) * a

l1, l2 = len(s1), len(s2)

for i in xrange(1, l1 + 1):
    for j in xrange(1, l2 + 1):
        sc = blosum62.get((s1[i - 1], s2[j - 1]))
        if sc == None:
            sc = blosum62.get((s2[j - 1], s1[i - 1]))
        m[i][j] = max(m[i - 1][j - 1] + sc, ix[i - 1][j - 1] + sc,
                      iy[i - 1][j - 1] + sc)
        ix[i][j] = max(m[i - 1][j] - b, ix[i - 1][j] - a, iy[i - 1][j] - b)
        iy[i][j] = max(m[i][j - 1] - b, ix[i - 1][j] - b, iy[i][j - 1] - a)

print max(m[l1][l2], ix[l1][l2], iy[l1][l2])


def traceback(dist, s1, s2):
    t1, t2, i, j = '', '', len(s1), len(s2)
    while not (i == 0 and j == 0):
        l, d, t = dist[i][j - 1], dist[i - 1][j - 1], dist[i - 1][j]
        if s1[i - 1] == s2[j - 1] or (d == max(l, d, t) and d > l and d > t):
Ejemplo n.º 12
0
def create_figures(feature_to_weighted_sums, weight_totals, min_total,
                   report_dir, filetype):
    aa_blosum = set()
    for aa1, aa2 in blosum62.keys():
        aa_blosum.add(aa1)
        aa_blosum.add(aa2)

    include_mask = weight_totals >= min_total

    p = re.compile(r'aa_to_([A-Z])$')
    aa_to_features = {}
    for feature_name, weighted_sums in feature_to_weighted_sums.items():
        m = p.match(feature_name)
        if m:
            aa = m[1]
            mean_by_heads = np.where(include_mask,
                                     weighted_sums / weight_totals, -1)
            feature_vector = mean_by_heads.flatten()
            feature_vector = feature_vector[feature_vector != -1]
            aa_to_features[aa] = feature_vector

    aas = sorted(aa_to_features.keys())
    aas_set = set(aas)
    print('Excluding following AAs not in feature set', aa_blosum - aas_set)
    print('Excluding following AAs not in blosum62', aas_set - aa_blosum)
    aa_list = sorted(list(aas_set & aa_blosum))
    n_aa = len(aa_list)
    corr = np.zeros((n_aa, n_aa))
    for i, aa1 in enumerate(aa_list):
        vector1 = aa_to_features[aa1]
        for j, aa2 in enumerate(aa_list):
            if i == j:
                corr[i, j] = None
            else:
                vector2 = aa_to_features[aa2]
                corr[i, j], _ = pearsonr(vector1, vector2)

    cmap = 'Blues'
    ax = sns.heatmap(corr, cmap=cmap, vmin=-0.5)
    ax.set_xticklabels(aa_list)
    ax.set_yticklabels(aa_list)
    plt.savefig(report_dir / f'aa_corr_to.pdf', format=filetype)
    plt.close()

    blosum = np.zeros((n_aa, n_aa))
    for i, aa1 in enumerate(aa_list):
        for j, aa2 in enumerate(aa_list):
            if i == j:
                blosum[i, j] = None
            else:
                if blosum62.get((aa1, aa2)) is not None:
                    blosum[i, j] = blosum62.get((aa1, aa2))
                else:
                    blosum[i, j] = blosum62.get((aa2, aa1))

    ax = sns.heatmap(blosum, cmap=cmap, vmin=-4, vmax=4)
    ax.set_xticklabels(aa_list)
    ax.set_yticklabels(aa_list)
    plt.savefig(report_dir / f'blosum62.pdf', format=filetype)
    plt.close()

    corr_scores = []
    blos_scores = []
    for i in range(n_aa):
        for j in range(i):
            corr_scores.append(corr[i, j])
            blos_scores.append(blosum[i, j])
    print('Pearson Correlation between feature corr and blosum',
          pearsonr(corr_scores, blos_scores)[0])
Ejemplo n.º 13
0
 def match_callback(c1, c2):
     return blosum62.get((c1, c2), 1 if c1 == c2 else -4)
Ejemplo n.º 14
0
def score_function(x, y):
    return blosum62.get((x, y), blosum62.get((y, x)))
Ejemplo n.º 15
0
def score_function(x, y):
    if x == '-' or y == '-':
        return -5
    else:
        return blosum62.get((x, y), blosum62.get((y, x)))
Ejemplo n.º 16
0
import sys
from itertools import product
from Bio.SubsMat.MatrixInfo import blosum62


if __name__ == '__main__':
    s, t = sys.stdin.read().strip().split('\n')
    sl, tl = len(s), len(t)
    m = {(0, 0): (0, None)}
    f = {}
    g = {}
    h = {}
    m.update({((i, 0), (-11 - i, (i-1, 0))) for i in range(1, sl+1)})
    m.update({((0, i), (-11 - i, (0, i-1))) for i in range(1, tl+1)})
    for i, j in product(range(1, sl+1), range(1, tl+1)):
        cost = blosum62.get((s[i-1], t[j-1]))
        if cost == None:
            cost = blosum62.get((t[j-1], s[i-1]))
        f[(i, j)] = m[(i-1, j-1)][0] + cost
        gg = g.get((i-1, j))
        if gg != None:
            gg -= 1
        hh = h.get((i, j-1))
        if hh != None:
            hh -= 1
        g[(i, j)] = max(m[(i-1, j)][0] - 12, gg)
        h[(i, j)] = max(m[(i, j-1)][0] - 12, hh)
        v = max(f[(i, j)], g[(i, j)], h[(i, j)])
        if v == f[(i, j)]:
            m[(i, j)] = (v, (i-1, j-1))
        elif v == g[(i, j)]:
Ejemplo n.º 17
0
def blosum62_score(x, y):
    cost = blosum62.get((x, y))
    if cost == None: cost = blosum62.get((y, x))
    return cost