Esempio n. 1
0
# Implement LINEARSPACEALIGNMENT to solve the Global Alignment Problem for a large dataset.
# Input: Two long (10000 amino acid) protein strings written in the single-letter amino acid alphabet.
# Output: The maximum alignment score of these strings, followed by an alignment achieving this
# maximum score. Use the BLOSUM62 scoring matrix and indel penalty sigma = 5.

# Sample Input:
# PLEASANTLY
# MEANLY

# Sample Output:
# 8
# PLEASANTLY
# -MEA--N-LY

import inout
import common

str1 = inout.infilelines[0].strip()
str2 = inout.infilelines[1].strip()

scoring_matrix = common.parse_scoring_matrix(inout.readlines('BLOSUM62.txt'))
indel_penalty = -5

score, alignment1, alignment2 = common.linear_space_alignment(scoring_matrix, indel_penalty, str1, str2)
inout.output('{}\n{}\n{}'.format(str(score), alignment1, alignment2))
Esempio n. 2
0
# Implement LINEARSPACEALIGNMENT to solve the Global Alignment Problem for a large dataset.
# Input: Two long (10000 amino acid) protein strings written in the single-letter amino acid alphabet.
# Output: The maximum alignment score of these strings, followed by an alignment achieving this
# maximum score. Use the BLOSUM62 scoring matrix and indel penalty sigma = 5.

# Sample Input:
# PLEASANTLY
# MEANLY

# Sample Output:
# 8
# PLEASANTLY
# -MEA--N-LY

import inout
import common

str1 = inout.infilelines[0].strip()
str2 = inout.infilelines[1].strip()

scoring_matrix = common.parse_scoring_matrix(inout.readlines('BLOSUM62.txt'))
indel_penalty = -5

score, alignment1, alignment2 = common.linear_space_alignment(
    scoring_matrix, indel_penalty, str1, str2)
inout.output('{}\n{}\n{}'.format(str(score), alignment1, alignment2))
# Input: Two protein strings written in the single-letter amino acid alphabet.
# Output: The maximum score of a local alignment of the strings, followed by a local alignment of these
# strings achieving the maximum score. Use the PAM250 scoring matrix and indel penalty sigma = 5.

# Sample Input:
# MEANLY
# PENALTY

# Sample Output:
# 15
# EANL-Y
# ENALTY

import inout
import common

str1 = inout.infilelines[0].strip()
str2 = inout.infilelines[1].strip()

scoring_matrix = common.parse_scoring_matrix(inout.readlines('PAM250_1.txt'))
indel_penalty = -5

longest, backtrack_matrix, best_row, best_col = common.scored_longest_common_subsequence_local(
    scoring_matrix, indel_penalty, str1, str2)
aligned1, aligned2 = common.output_longest_common_subsequence_local(
    backtrack_matrix, str1, str2, best_row, best_col)

inout.output('{}\n{}\n{}'.format(longest, aligned1, aligned2))
import inout

stop_codons = []
codon_map = {}
for line in inout.readlines('RNA_codon_table_1.txt'):
	tokens = line.strip().split(' ')
	codon = tokens[0]
	if len(tokens) == 1:
		stop_codons.append(codon)
	else:
		amino_acid = tokens[1]
		codon_map[codon] = amino_acid

def transcribe(sequence):
	outstr = ''
	while sequence:
		codon, sequence = sequence[:3], sequence[3:]
		if codon in stop_codons:
			break
		else:
			outstr = outstr + codon_map[codon]
	return outstr
Esempio n. 5
0
import inout

mass_table = {}
for line in inout.readlines('integer_mass_table.txt'):
	amino_acid, mass = line.strip().split(' ')
	mass_table[amino_acid] = int(mass)

def total_mass(peptide):
	total = 0
	for amino_acid in peptide:
		total = total + mass_table[amino_acid]
	return total

def cyclic_spectrum(peptide):
	out_spectrum = [0, total_mass(peptide)]

	peptide_2 = peptide + peptide	# for easy cyclic access
	for k in range(1, len(peptide)):
		for n in range(len(peptide)):
			subpep = peptide_2[n:n+k]
			out_spectrum.append(total_mass(subpep))
	return sorted(out_spectrum)

def linear_spectrum(peptide):
	out_spectrum = [0]

	for i in range(0, len(peptide)):
		for j in range(i, len(peptide)):
			subpep = peptide[i:j+1]
			out_spectrum.append(total_mass(subpep))
	return sorted(out_spectrum)
Esempio n. 6
0
# Input: Two protein strings written in the single-letter amino acid alphabet.
# Output: The maximum score of a local alignment of the strings, followed by a local alignment of these
# strings achieving the maximum score. Use the PAM250 scoring matrix and indel penalty sigma = 5.

# Sample Input:
# MEANLY
# PENALTY

# Sample Output:
# 15
# EANL-Y
# ENALTY

import inout
import common

str1 = inout.infilelines[0].strip()
str2 = inout.infilelines[1].strip()

scoring_matrix = common.parse_scoring_matrix(inout.readlines('PAM250_1.txt'))
indel_penalty = -5

longest, backtrack_matrix, best_row, best_col = common.scored_longest_common_subsequence_local(scoring_matrix, indel_penalty, str1, str2)
aligned1, aligned2 = common.output_longest_common_subsequence_local(backtrack_matrix, str1, str2, best_row, best_col)

inout.output('{}\n{}\n{}'.format(longest, aligned1, aligned2))
Esempio n. 7
0
# Generating Theoretical Spectrum Problem: Generate the theoretical spectrum of a cyclic peptide.
#     Input: An amino acid string Peptide.
#     Output: Cyclospectrum(Peptide).

# Sample Input:
#     LEQN

# Sample Output:
#     0 113 114 128 129 227 242 242 257 355 356 370 371 484

import inout

peptide = inout.infilelines[0].strip()

mass_table = {}
for line in inout.readlines('integer_mass_table.txt'):
    amino_acid, mass = line.strip().split(' ')
    mass_table[amino_acid] = int(mass)


def total_mass(peptide):
    total = 0
    for amino_acid in peptide:
        total = total + mass_table[amino_acid]
    return total


spectrum = [0, total_mass(peptide)]

peptide_2 = peptide + peptide  # for easy cyclic access
for k in range(1, len(peptide)):