def find_N_glycosylation_motif(protein): finds = [] for pos in substring_find(protein, 'N'): if len(protein[pos-1:pos + 3]) == 4: if (protein[pos] != 'P' and protein[pos + 1] in ('S', 'T') and protein[pos + 2] != 'P'): finds.append(pos) return finds
def adjacency_list(seqs): vertex = set() for seq in seqs: seq_data = seqs[seq] overlap_seq = int(ceil(len(seq_data) / 2)) adjs = set(seqs.keys()) adjs.remove(seq) for adj in adjs: overlap = overlap_seq adj_data = seqs[adj] overlap_adj = int(ceil(len(adj_data) / 2)) if overlap < overlap_adj: overlap = overlap_adj pos = list(substring_find(seq_data, adj_data[:overlap])) if pos: vertex.add((seq, adj, pos[-1])) return vertex
def kmer_composition(dna_string): output = [] for p in lexf_order(4, 'ACGT'): pos = list(substring_find(dna_string, ''.join(p))) output.append(str(len(pos))) return output
from prot import prepare_codon_table from revp import read_fasta from revc import reverse_complement from rna import rna_transcription from subs import substring_find if __name__ == "__main__": with open(os.path.join('data', 'rosalind_orf.txt')) as dataset: seqs = read_fasta(dataset) codon_table = prepare_codon_table(os.path.join('data', 'codon_table')) output = [] dna = seqs.popitem()[1] rna = (rna_transcription(dna), rna_transcription(reverse_complement(dna))) for seq in rna: for offset in (0, 1, 2): for start_pos in substring_find(seq[offset:], 'AUG'): current = [] for codon in (seq[i:i + 3] for i in range(start_pos, len(seq), 3)): if len(codon) == 3: if codon_table[codon] == 'Stop' and current: output.append(''.join(current)) current = [] elif codon == 'AUG' or current: current.append(codon_table[codon]) print("\n".join(set(output)))
from prot import prepare_codon_table from revp import read_fasta from revc import reverse_complement from rna import rna_transcription from subs import substring_find if __name__ == "__main__": with open(os.path.join("data", "rosalind_orf.txt")) as dataset: seqs = read_fasta(dataset) codon_table = prepare_codon_table(os.path.join("data", "codon_table")) output = [] dna = seqs.popitem()[1] rna = (rna_transcription(dna), rna_transcription(reverse_complement(dna))) for seq in rna: for offset in (0, 1, 2): for start_pos in substring_find(seq[offset:], "AUG"): current = [] for codon in (seq[i : i + 3] for i in range(start_pos, len(seq), 3)): if len(codon) == 3: if codon_table[codon] == "Stop" and current: output.append("".join(current)) current = [] elif codon == "AUG" or current: current.append(codon_table[codon]) print("\n".join(set(output)))