Esempio n. 1
0
>Rosalind_9712
GTGCCGCCTCAGACTTACAGGCACTGCCCGCGGCTCATCAGAGGGGTTGTTGCTAACGAT
AGAAGAATTACCTGGCTTGCCACGCGCCTGAGAACGGAAACCATTATATGCTGAGCACCC
GAGCTGTAAGGTTTCGCACTGCCAAACGCGAGGCGGCAAGGGAGGGTCTGTATCAGGGAC
AGAATAATCAAGGCGATCCGTCAAGACCGCCGCGTGAAGCGAGATCACTTCTGTTAGGTC
GAGGTCGTAACGCCTGGCTATGCTTATCGAGCCAAAAGCATCTTATAAGCAAGACACCTC
GTAATTCAAGCGCATGTCACTGCTCAATCGTGAGTTCATCTGTGAGAACGTTTTCGCCAA
CAGTGACTCTGAGGCCTAACGCCTCAATAAGTGAGAAGGTGACCCCGATACGACCCACCG
AAGTTAACATCAATAAGTATGTATTCATGAACACAAACTTCTCCTTGGTACGGCCATAGG
TACAATGGAAAGTAGTGATATTGGGATGGTGACCTCGGCGCCTACCCCTAGCCAGCCGGT
CATAAATTTTACAACCGCGCTACAGAGCCAGCGCTTGGGTGCTTCCCGTGCCTTTCAATT
ACGCGATTCATCTGGTCATATGGACGTATATTGGAGTTATGGGCAATATAGTTGTTCGCT
CTAGCCCGAAGATAAACACCACTCTTAAGTCATCGCAAACTACCTGACAGGGAGACCTTC
CCTCCACATCAGTCTAATGCATCGCAGCAACACAACCCGCCCGCGCTTATCTGGGAAATG
CTCGAAATGGGGGCATCTTTGAGCTC
>Rosalind_6280
AAGTAGATTACCCGTTCT
'''
    seqs = read_fasta(data)
    ref = str(seqs[0].sequence)
    substring = seqs[1].sequence
    indexes = list()
    i = 0
    for char in substring:
        i = ref.find(char, i) +1
        if (i >= 0):
            indexes.append(i)

    for x in indexes:
        print x,
Esempio n. 2
0
import numpy as np
from fasta.Fasta import read_fasta

# hetland.org/coding/python/levenshtein.py
def levenshtein(a,b):
    "Calculates the Levenshtein distance between a and b."
    n, m = len(a), len(b)
    if n > m:
        # Make sure n <= m, to use O(min(n,m)) space
        a,b = b,a
        n,m = m,n
        
    current = range(n+1)
    for i in range(1,m+1):
        previous, current = current, [i]+[0]*n
        for j in range(1,n+1):
            add, delete = previous[j]+1, current[j-1]+1
            change = previous[j-1]
            if a[j-1] != b[i-1]:
                change = change + 1
            current[j] = min(add, delete, change)
            
    return current[n]

if __name__ == '__main__':
    data = open("rosalind_edit_1_dataset.txt", 'r').read()
    fasta = read_fasta(data)
    print levenshtein(fasta[0].sequence, fasta[1].sequence)
    #print levenshtein('PLEASANTLY', 'MEANLY')
Esempio n. 3
0
P19835_BAL_HUMAN
O13188
P02760_HC_HUMAN
Q4FZD7
Q00001_RHGA_ASPAC
Q47A87
Q8R1Y2
A6NM15
P22457_FA7_BOVIN
P03415_VME1_CVMA5
P11171_41_HUMAN
'''
    for x in filter(None, data.split('\n')):
        url = 'http://www.uniprot.org/uniprot/' + x + '.fasta'
        fasta_content = urlopen(url).read()
        seq = read_fasta(fasta_content)
        indexes = list()
        s = seq[0].sequence
        matches = filter(None, list(re.finditer(le_regex_1, s)))
        if (len(matches) > 0):
            for m in matches:
                if m not in indexes:
                    indexes.append( m.start()+1)

        matches = filter(None, list(re.finditer(le_regex_2, s)))
        if (len(matches) > 0):
            for m in matches:
                if m not in indexes:
                    indexes.append( m.start()+1)

        if (len(indexes) > 0):
Esempio n. 4
0
def stacksize(since=0.0):
	'''Return stack size in bytes.
	'''
	return _VmB('VmStk:') - since

if __name__ == '__main__':
	start_time = time.time()
	data = '''
>Rosalind_92
AUGCUUC
'''

	m0 = memory()
	
	# le sequence
	sequence = read_fasta(data)[0].sequence
	
	au_count1, au_count2 = sorted([sequence.count('A'), sequence.count('U')])
	cg_count1, cg_count2 = sorted([sequence.count('C'), sequence.count('G')])
	
	matchings = 1
	for i in range(au_count1):
		matchings *= (au_count2 - i)
	for j in range(cg_count1):
		matchings *= (cg_count2 - j)
		
	print matchings

	m1 = memory(m0)

	print m1