コード例 #1
0
ファイル: 073_SGRA.py プロジェクト: BioXiao/Rosalind
def infer_longest_peptide(masses):
    '''Returns the longest protein string that matches the spectrum graph of the given masses.'''
    # Build the graph from the given masses.
    graph = dict()
    protein_weight_dict = ProteinWeightDict()
    for i in xrange(len(masses)):
        for j in xrange(i+1, len(masses)):
            # Break the inner loop if we've exceeded the maximum weight.
            if masses[j] - masses[i] > max(protein_weight_dict.values()) + 1:
                break

            # Check if the weight associated with masses i and j approximately matches a known protein.
            temp_protein = find_weight_match(masses[j] - masses[i], 0.001)
            if temp_protein is not None:
                graph[masses[i], masses[j]] = temp_protein

    # Get the topological ordering of the graph.
    top_order = topological_ordering(graph.keys())

    # Build the longest path to each node.
    S = {node: '' for node in top_order}
    for node in top_order:
        for predecessor in map(lambda n: n[0], filter(lambda e: e[1] == node, graph.keys())):
            if len(S[predecessor]) + 1 > len(S[node]):
                S[node] = S[predecessor] + graph[(predecessor, node)]

    # Return the longest path.
    return max(S.values(), key=len)
コード例 #2
0
def infer_longest_peptide(masses):
    '''Returns the longest protein string that matches the spectrum graph of the given masses.'''
    # Build the graph from the given masses.
    graph = dict()
    protein_weight_dict = ProteinWeightDict()
    for i in xrange(len(masses)):
        for j in xrange(i + 1, len(masses)):
            # Break the inner loop if we've exceeded the maximum weight.
            if masses[j] - masses[i] > max(protein_weight_dict.values()) + 1:
                break

            # Check if the weight associated with masses i and j approximately matches a known protein.
            temp_protein = find_weight_match(masses[j] - masses[i], 0.001)
            if temp_protein is not None:
                graph[masses[i], masses[j]] = temp_protein

    # Get the topological ordering of the graph.
    top_order = topological_ordering(graph.keys())

    # Build the longest path to each node.
    S = {node: '' for node in top_order}
    for node in top_order:
        for predecessor in map(lambda n: n[0],
                               filter(lambda e: e[1] == node, graph.keys())):
            if len(S[predecessor]) + 1 > len(S[node]):
                S[node] = S[predecessor] + graph[(predecessor, node)]

    # Return the longest path.
    return max(S.values(), key=len)
コード例 #3
0
ファイル: Textbook_02D.py プロジェクト: Shivi91/Rosalind-1
with open('data/textbook/rosalind_2d.txt') as input_data:
	cyclospec = input_data.read().strip().split()

# Create the protein weight dictionary.
weight = ProteinWeightDict()

# Let n be the length of a given peptide, and L be the length of its cyclospectrum.  Then L = n(n-1) + 2.
# Using the quadratic formula to to solve for n:  n = (sqrt(4L-7) + 1)/2
n = int((sqrt(4*len(cyclospec)-7)+1)/2)

# Find the first n protein in the peptide.  
# Need to be careful: two small proteins can add to be less than a larger one, so we can't just take the first n nonzero entries.
# Fortunately, no two small proteins masses add to that of a larger protein.
protein, i = [], 1
while len(protein) != n:
	if int(cyclospec[i]) in map(int,weight.values()):
		protein.append(cyclospec[i])
	i += 1

# Get the name of each protein corresponding to a given weight (if multiple, only take one).
names = []
for w in protein:
	names.append([items[0] for items in weight.items() if int(items[1])==int(w)][0])

# Build the possible sequences.
seq = append_char(names,names)
for repeat in xrange(1,n):
	seq = filter(lambda subpeptide:set(spectrum(subpeptide)) < set(cyclospec), set(seq))
	if repeat != n-1:
		seq = append_char(seq,names)
コード例 #4
0
with open('data/textbook/rosalind_2d.txt') as input_data:
    cyclospec = input_data.read().strip().split()

# Create the protein weight dictionary.
weight = ProteinWeightDict()

# Let n be the length of a given peptide, and L be the length of its cyclospectrum.  Then L = n(n-1) + 2.
# Using the quadratic formula to to solve for n:  n = (sqrt(4L-7) + 1)/2
n = int((sqrt(4 * len(cyclospec) - 7) + 1) / 2)

# Find the first n protein in the peptide.
# Need to be careful: two small proteins can add to be less than a larger one, so we can't just take the first n nonzero entries.
# Fortunately, no two small proteins masses add to that of a larger protein.
protein, i = [], 1
while len(protein) != n:
    if int(cyclospec[i]) in map(int, weight.values()):
        protein.append(cyclospec[i])
    i += 1

# Get the name of each protein corresponding to a given weight (if multiple, only take one).
names = []
for w in protein:
    names.append(
        [items[0] for items in weight.items() if int(items[1]) == int(w)][0])

# Build the possible sequences.
seq = append_char(names, names)
for repeat in xrange(1, n):
    seq = filter(lambda subpeptide: set(spectrum(subpeptide)) < set(cyclospec),
                 set(seq))
    if repeat != n - 1: