def main(): strings = parse_fasta('problem_datasets/rosalind_corr.txt') strings += [rev_comp(i) for i in strings] corr = error_correct(strings) with open('output/rosalind_corr_out.txt', 'w') as outfile: for i in corr: outfile.write('->'.join(i) + '\n')
def count_apperances(string_list): ''' Count how many times a given DNA string occurs in a list. If the reverse complement of that string occurs in the list, it counts towards the original. ''' str_count = {} for i in string_list: if i in str_count: str_count[i] += 1 else: if rev_comp(i) in str_count: str_count[rev_comp(i)] += 1 else: str_count[i] = 1 str_count[rev_comp(i)] = 1 return str_count
def locate_sites(f_dna): r_dna = rev_comp(f_dna) for i in range(4, 13, 2): for j in range(len(f_dna)): f = f_dna[j:j + i] r = r_dna[len(r_dna) - j - i:len(r_dna) - j] if f == r: yield (j + 1, i)
def locate_sites(f_dna): r_dna = rev_comp(f_dna) for i in range(4, 13, 2): for j in range(len(f_dna)): f = f_dna[j:j+i] r = r_dna[len(r_dna)-j-i:len(r_dna)-j] if f == r: yield j+1, i
def raw_translate(seq): ''' Translate all 6 ORFs (3 for the forward strand, 3 for the reverse). ''' table = rosalind_utils.codon_table() peptides = ['' for x in range(6)] rev = rosalind_utils.rev_comp(seq) for i in range(3): for j in range(i, len(seq), 3): codon = seq[j:j + 3] aa = table.get(codon, '-') peptides[i] += aa for j in range(i, len(rev), 3): codon = rev[j:j + 3] aa = table.get(codon, '-') peptides[i + 3] += aa return (peptides)
def raw_translate(seq): ''' Translate all 6 ORFs (3 for the forward strand, 3 for the reverse). ''' table = rosalind_utils.codon_table() peptides = ['' for x in range(6)] rev = rosalind_utils.rev_comp(seq) for i in range(3): for j in range(i, len(seq), 3): codon = seq[j:j+3] aa = table.get(codon,'-') peptides[i] += aa for j in range(i, len(rev), 3): codon = rev[j:j+3] aa = table.get(codon,'-') peptides[i+3] += aa return(peptides)