Ejemplo n.º 1
0
 def create_graphs(ext=1):
     gs = []
     g = []
     for line in create_strings(ext=ext):
         numbers = [int(s) for s in line.split(' ')]
         if len(numbers) == 1:
             pass
         elif len(numbers) == 2:
             if len(g) > 0:
                 gs.append(g)
             g = [(numbers[0], numbers[1])]
         else:
             g.append((numbers[0], numbers[1], numbers[2]))
     if len(g) > 0:
         gs.append(g)
     return gs
Ejemplo n.º 2
0
        i, j, di, dj = moves[(i, j)]
        if di == 0:
            s1.append('-')
            t1.append(t[j])
        elif dj == 0:
            s1.append(s[i])
            t1.append('-')
        else:
            s1.append(s[i])
            t1.append(t[j])

    return score, s1[:-1][::-1], t1[:-1][::-1]


def ba5h(s, t):
    d, s1, t1 = align([s0 for s0 in s], [t0 for t0 in t],
                      build_matrix=build_matrix,
                      backtrack=backtrack)
    return (d, ''.join(s1), ''.join(t1))


if __name__ == '__main__':
    import timeit
    start_time = timeit.default_timer()
    strings = create_strings(ext=1)
    d, s1, t1 = ba5h(strings[0], strings[1])
    print('Score = {0}'.format(d))
    print(s1)
    print(t1)
    print('Elapsed Time = {0}'.format(timeit.default_timer() - start_time))
Ejemplo n.º 3
0
        while i>0 and j>0:
            index =  argmax([0,
                             distances[i-1,j]   - indel_cost,
                             distances[i,j-1]   - indel_cost,
                             distances[i-1,j-1] + score(v[i-1],w[j-1])])
            i1,j1 = [(0,j-1), (i-1,j), (i,j-1), (i-1,j-1)][index]
            v1.append(v[i1] if i1<i else '-')
            w1.append(w[j1] if j1<j else '-')
            i,j = i1,j1
    
        return sum(score(u,v) for u,v in zip(v1,w1)),v1[::-1],w1[::-1]
    
    score,u1,v1=dynamic_programming([vv for vv in v],[ww for ww in w])
    return score,''.join(u1),''.join(v1)

if __name__=='__main__':
    from helpers import create_strings
    #d,s1,t1 = oap('CTAAGGGATTCCGGTAATTAGACAG',
                  #'ATAGACCATATGTCAGTGACTGTGTAA')
    start_time = time()
    strings  = create_strings('oap',fasta=1,ext=4)
    d,s1,t1 = oap(strings[0],strings[1])      
    print ('{0}'.format(d))
    print (s1)
    print (t1)
    print (time()-start_time)
    with open('oap.txt','w') as o:
        o.write('{0}\n'.format(d))
        o.write('{0}\n'.format(s1))
        o.write('{0}\n'.format(t1))    
    
Ejemplo n.º 4
0
                match_t = True

        if match_s and match_t:
            if gap > 0:
                score -= (sigma + (gap - 1) * epsilon)
            gap = 0
            if (s[i], t[i]) in replace_score:
                score += replace_score[(s[i], t[i])]
            else:
                score += replace_score[(t[i], s[i])]

    if gap > 0:
        score -= (sigma + (gap - 1) * epsilon)

    return score


if __name__ == '__main__':
    from helpers import create_strings
    #score,s,t = gaff('PRTEINS','PRTWPSEIN')
    strings = create_strings(fasta=True, ext=3)
    #print (strings[0])
    #print (strings[1])
    score, s, t = gaff(strings[0], strings[1])
    print(score, get_score(s, t))
    print(s)
    print(t)
    with open('gaff.txt', 'w') as o:
        o.write('{0}\n'.format(get_score(s, t)))
        o.write('{0}\n'.format(s))
        o.write('{0}\n'.format(t))
Ejemplo n.º 5
0
from graphs import gs

if __name__ == '__main__':

    from helpers import create_strings

    #graphs = [
    #[(3, 2),
    #(3 ,2),
    #(2, 1)],

    #[(3, 2),
    #(3, 2),
    #(1, 2)]
    #]

    graphs = []
    edges = []
    for s in create_strings(ext=2):
        if len(s) == 0:
            if len(edges) > 0:
                graphs.append(edges)
                edges = []
        else:
            values = [int(x) for x in s.split(" ")]
            if len(values) > 1:
                edges.append(values)
    if len(edges) > 0:
        graphs.append(edges)

    print(' '.join([str(gs(edges)) for edges in graphs]))
Ejemplo n.º 6
0
from graphs import sc

if __name__ == '__main__':
    from helpers import create_strings
    #graphs = [
    #2,
    #[(3, 2),
    #(3 ,2),
    #(2, 1)],

    #[(3, 2),
    #(3, 2),
    #(1, 2)]
    #]

    graphs = []
    edges = []
    for s in create_strings(ext=1):
        if len(s) == 0:
            if len(edges) > 0:
                graphs.append(edges)
                edges = []
        else:
            values = [int(x) for x in s.split(" ")]
            if len(values) > 1:
                edges.append(values)
    if len(edges) > 0:
        graphs.append(edges)

    print(' '.join([str(sc(g)) for g in graphs]))
Ejemplo n.º 7
0
# BA5M.py Find a Highest-Scoring Multiple Sequence Alignment 

from align import FindHighestScoringMultipleSequenceAlignment




if __name__=='__main__':
    from helpers import create_strings    
    s,u,v,w = FindHighestScoringMultipleSequenceAlignment('ATATCCG','TCCGA','ATGTACTG')
    print (s)
    print (u)
    print (v)
    print (w)
    
    s1,u1,v1,w1 = FindHighestScoringMultipleSequenceAlignment('TGTTTAAAAATGTCCGCAACCATTTC',
                                                              'GATATAAAACAGGGATAACTGCAATGG',
                                                              'CCTGCTACTTTATGCCGTCTCCATATGCG')
    print (s1)
    print (u1)
    print (v1)
    print (w1) 
    
    ss=create_strings(ext=3)
    s2,u2,v2,w2 = FindHighestScoringMultipleSequenceAlignment(ss[0],ss[1],ss[2])
    print (s2)
    print (u2)
    print (v2)
    print (w2)    
 
Ejemplo n.º 8
0
                        action='store_true',
                        help='process sample dataset')
    parser.add_argument('--extra',
                        default=False,
                        action='store_true',
                        help='process extra dataset')
    parser.add_argument('--rosalind',
                        default=False,
                        action='store_true',
                        help='process Rosalind dataset')
    args = parser.parse_args()
    if args.sample:
        print(NumberPerfectMatchings('AGCUAGUCAU'))

    if args.extra:
        Input, Expected = read_strings('data/....txt', init=0)
        ...

    if args.rosalind:

        Input = create_strings(path='./data', fasta=True)
        Result = NumberPerfectMatchings(Input[0])
        print(Result)
        with open(f'{os.path.basename(__file__).split(".")[0]}.txt', 'w') as f:
            f.write(f'{Result}\n')

    elapsed = time.time() - start
    minutes = int(elapsed / 60)
    seconds = elapsed - 60 * minutes
    print(f'Elapsed Time {minutes} m {seconds:.2f} s')
Ejemplo n.º 9
0
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>

# GCON Global Alignment with Constant Gap Penalty

from helpers import create_strings
from align import san_kai


def gcon(s, t):
    score, s1, t1 = san_kai([s0 for s0 in s], [t0 for t0 in t],
                            sigma=5,
                            epsilon=0)
    return score, ''.join(s1), ''.join(t1)


if __name__ == '__main__':
    from Bio.SubsMat.MatrixInfo import blosum62
    import sys
    score = -float('inf')
    if sys.argv[1] == '--sample':
        score, _, _ = gcon('PLEASANTLY', 'MEANLY')
    elif sys.argv[1] == '--test':
        strings = create_strings('gcon', ext=3, fasta=True)
        score, _, _ = gcon(strings[0], strings[1])
    else:
        score, _, _ = gcon(sys.argv[1], sys.argv[2])
    print(score)
Ejemplo n.º 10
0
#    Copyright (C) 2019 Greenweaves Software Limited
#
#    This is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This software is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>

# BA5I Find a Highest-Scoring Overlap Alignment of Two Strings

from align import create_distance_matrix, overlap_assignment

if __name__ == '__main__':
    from helpers import create_strings

    strings = create_strings('ba5i', ext=1)
    d, s1, t1 = overlap_assignment(strings[0], strings[1])
    print('{0}'.format(d))
    print(s1)
    print(t1)
Ejemplo n.º 11
0
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>
#
#    prsm Matching a Spectrum to a Protein

from spectrum import prsm

if __name__ == '__main__':
    from helpers import create_strings

    i = 0
    s = []
    R = []
    for ll in create_strings(ext=3):
        if i == 0:
            n = int(ll)
        elif i < n + 1:
            s.append(ll)
        else:
            R.append(float(ll))
        i += 1

    m, s_max = prsm(s, R)

    print(m)
    print(s_max)
Ejemplo n.º 12
0
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>

# HDAG Hamiltonian Path in DAG 
#


if __name__=='__main__':
    from helpers import create_strings
       
    gs = []
    g  = []
    for line in create_strings(ext=1):
        if len(line)==0:
            if len(g)>0:
                gs.append(g)
            g=[]
            continue
        numbers = [int(s) for s in line.split(' ')]
        if len(numbers)==1:
            continue
        elif len(numbers)==2:
            
            g.append((numbers[0],numbers[1]))

    if len(g)>0:
        gs.append(g)