# Implement LINEARSPACEALIGNMENT to solve the Global Alignment Problem for a large dataset. # Input: Two long (10000 amino acid) protein strings written in the single-letter amino acid alphabet. # Output: The maximum alignment score of these strings, followed by an alignment achieving this # maximum score. Use the BLOSUM62 scoring matrix and indel penalty sigma = 5. # Sample Input: # PLEASANTLY # MEANLY # Sample Output: # 8 # PLEASANTLY # -MEA--N-LY import inout import common str1 = inout.infilelines[0].strip() str2 = inout.infilelines[1].strip() scoring_matrix = common.parse_scoring_matrix(inout.readlines('BLOSUM62.txt')) indel_penalty = -5 score, alignment1, alignment2 = common.linear_space_alignment(scoring_matrix, indel_penalty, str1, str2) inout.output('{}\n{}\n{}'.format(str(score), alignment1, alignment2))
# Implement LINEARSPACEALIGNMENT to solve the Global Alignment Problem for a large dataset. # Input: Two long (10000 amino acid) protein strings written in the single-letter amino acid alphabet. # Output: The maximum alignment score of these strings, followed by an alignment achieving this # maximum score. Use the BLOSUM62 scoring matrix and indel penalty sigma = 5. # Sample Input: # PLEASANTLY # MEANLY # Sample Output: # 8 # PLEASANTLY # -MEA--N-LY import inout import common str1 = inout.infilelines[0].strip() str2 = inout.infilelines[1].strip() scoring_matrix = common.parse_scoring_matrix(inout.readlines('BLOSUM62.txt')) indel_penalty = -5 score, alignment1, alignment2 = common.linear_space_alignment( scoring_matrix, indel_penalty, str1, str2) inout.output('{}\n{}\n{}'.format(str(score), alignment1, alignment2))
# Input: Two protein strings written in the single-letter amino acid alphabet. # Output: The maximum score of a local alignment of the strings, followed by a local alignment of these # strings achieving the maximum score. Use the PAM250 scoring matrix and indel penalty sigma = 5. # Sample Input: # MEANLY # PENALTY # Sample Output: # 15 # EANL-Y # ENALTY import inout import common str1 = inout.infilelines[0].strip() str2 = inout.infilelines[1].strip() scoring_matrix = common.parse_scoring_matrix(inout.readlines('PAM250_1.txt')) indel_penalty = -5 longest, backtrack_matrix, best_row, best_col = common.scored_longest_common_subsequence_local( scoring_matrix, indel_penalty, str1, str2) aligned1, aligned2 = common.output_longest_common_subsequence_local( backtrack_matrix, str1, str2, best_row, best_col) inout.output('{}\n{}\n{}'.format(longest, aligned1, aligned2))
import inout stop_codons = [] codon_map = {} for line in inout.readlines('RNA_codon_table_1.txt'): tokens = line.strip().split(' ') codon = tokens[0] if len(tokens) == 1: stop_codons.append(codon) else: amino_acid = tokens[1] codon_map[codon] = amino_acid def transcribe(sequence): outstr = '' while sequence: codon, sequence = sequence[:3], sequence[3:] if codon in stop_codons: break else: outstr = outstr + codon_map[codon] return outstr
import inout mass_table = {} for line in inout.readlines('integer_mass_table.txt'): amino_acid, mass = line.strip().split(' ') mass_table[amino_acid] = int(mass) def total_mass(peptide): total = 0 for amino_acid in peptide: total = total + mass_table[amino_acid] return total def cyclic_spectrum(peptide): out_spectrum = [0, total_mass(peptide)] peptide_2 = peptide + peptide # for easy cyclic access for k in range(1, len(peptide)): for n in range(len(peptide)): subpep = peptide_2[n:n+k] out_spectrum.append(total_mass(subpep)) return sorted(out_spectrum) def linear_spectrum(peptide): out_spectrum = [0] for i in range(0, len(peptide)): for j in range(i, len(peptide)): subpep = peptide[i:j+1] out_spectrum.append(total_mass(subpep)) return sorted(out_spectrum)
# Input: Two protein strings written in the single-letter amino acid alphabet. # Output: The maximum score of a local alignment of the strings, followed by a local alignment of these # strings achieving the maximum score. Use the PAM250 scoring matrix and indel penalty sigma = 5. # Sample Input: # MEANLY # PENALTY # Sample Output: # 15 # EANL-Y # ENALTY import inout import common str1 = inout.infilelines[0].strip() str2 = inout.infilelines[1].strip() scoring_matrix = common.parse_scoring_matrix(inout.readlines('PAM250_1.txt')) indel_penalty = -5 longest, backtrack_matrix, best_row, best_col = common.scored_longest_common_subsequence_local(scoring_matrix, indel_penalty, str1, str2) aligned1, aligned2 = common.output_longest_common_subsequence_local(backtrack_matrix, str1, str2, best_row, best_col) inout.output('{}\n{}\n{}'.format(longest, aligned1, aligned2))
# Generating Theoretical Spectrum Problem: Generate the theoretical spectrum of a cyclic peptide. # Input: An amino acid string Peptide. # Output: Cyclospectrum(Peptide). # Sample Input: # LEQN # Sample Output: # 0 113 114 128 129 227 242 242 257 355 356 370 371 484 import inout peptide = inout.infilelines[0].strip() mass_table = {} for line in inout.readlines('integer_mass_table.txt'): amino_acid, mass = line.strip().split(' ') mass_table[amino_acid] = int(mass) def total_mass(peptide): total = 0 for amino_acid in peptide: total = total + mass_table[amino_acid] return total spectrum = [0, total_mass(peptide)] peptide_2 = peptide + peptide # for easy cyclic access for k in range(1, len(peptide)):