>Rosalind_9712 GTGCCGCCTCAGACTTACAGGCACTGCCCGCGGCTCATCAGAGGGGTTGTTGCTAACGAT AGAAGAATTACCTGGCTTGCCACGCGCCTGAGAACGGAAACCATTATATGCTGAGCACCC GAGCTGTAAGGTTTCGCACTGCCAAACGCGAGGCGGCAAGGGAGGGTCTGTATCAGGGAC AGAATAATCAAGGCGATCCGTCAAGACCGCCGCGTGAAGCGAGATCACTTCTGTTAGGTC GAGGTCGTAACGCCTGGCTATGCTTATCGAGCCAAAAGCATCTTATAAGCAAGACACCTC GTAATTCAAGCGCATGTCACTGCTCAATCGTGAGTTCATCTGTGAGAACGTTTTCGCCAA CAGTGACTCTGAGGCCTAACGCCTCAATAAGTGAGAAGGTGACCCCGATACGACCCACCG AAGTTAACATCAATAAGTATGTATTCATGAACACAAACTTCTCCTTGGTACGGCCATAGG TACAATGGAAAGTAGTGATATTGGGATGGTGACCTCGGCGCCTACCCCTAGCCAGCCGGT CATAAATTTTACAACCGCGCTACAGAGCCAGCGCTTGGGTGCTTCCCGTGCCTTTCAATT ACGCGATTCATCTGGTCATATGGACGTATATTGGAGTTATGGGCAATATAGTTGTTCGCT CTAGCCCGAAGATAAACACCACTCTTAAGTCATCGCAAACTACCTGACAGGGAGACCTTC CCTCCACATCAGTCTAATGCATCGCAGCAACACAACCCGCCCGCGCTTATCTGGGAAATG CTCGAAATGGGGGCATCTTTGAGCTC >Rosalind_6280 AAGTAGATTACCCGTTCT ''' seqs = read_fasta(data) ref = str(seqs[0].sequence) substring = seqs[1].sequence indexes = list() i = 0 for char in substring: i = ref.find(char, i) +1 if (i >= 0): indexes.append(i) for x in indexes: print x,
import numpy as np from fasta.Fasta import read_fasta # hetland.org/coding/python/levenshtein.py def levenshtein(a,b): "Calculates the Levenshtein distance between a and b." n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a,b = b,a n,m = m,n current = range(n+1) for i in range(1,m+1): previous, current = current, [i]+[0]*n for j in range(1,n+1): add, delete = previous[j]+1, current[j-1]+1 change = previous[j-1] if a[j-1] != b[i-1]: change = change + 1 current[j] = min(add, delete, change) return current[n] if __name__ == '__main__': data = open("rosalind_edit_1_dataset.txt", 'r').read() fasta = read_fasta(data) print levenshtein(fasta[0].sequence, fasta[1].sequence) #print levenshtein('PLEASANTLY', 'MEANLY')
P19835_BAL_HUMAN O13188 P02760_HC_HUMAN Q4FZD7 Q00001_RHGA_ASPAC Q47A87 Q8R1Y2 A6NM15 P22457_FA7_BOVIN P03415_VME1_CVMA5 P11171_41_HUMAN ''' for x in filter(None, data.split('\n')): url = 'http://www.uniprot.org/uniprot/' + x + '.fasta' fasta_content = urlopen(url).read() seq = read_fasta(fasta_content) indexes = list() s = seq[0].sequence matches = filter(None, list(re.finditer(le_regex_1, s))) if (len(matches) > 0): for m in matches: if m not in indexes: indexes.append( m.start()+1) matches = filter(None, list(re.finditer(le_regex_2, s))) if (len(matches) > 0): for m in matches: if m not in indexes: indexes.append( m.start()+1) if (len(indexes) > 0):
def stacksize(since=0.0): '''Return stack size in bytes. ''' return _VmB('VmStk:') - since if __name__ == '__main__': start_time = time.time() data = ''' >Rosalind_92 AUGCUUC ''' m0 = memory() # le sequence sequence = read_fasta(data)[0].sequence au_count1, au_count2 = sorted([sequence.count('A'), sequence.count('U')]) cg_count1, cg_count2 = sorted([sequence.count('C'), sequence.count('G')]) matchings = 1 for i in range(au_count1): matchings *= (au_count2 - i) for j in range(cg_count1): matchings *= (cg_count2 - j) print matchings m1 = memory(m0) print m1