def find_best_chain(current_chain, chars, max_lookahead, ch_score): # The first char has to be popped because it processed that char. global current_lookahead if not current_lookahead == 0: chars = chars[1:] # If there is only 1 char left we've arrived at the end of a chain. if len(chars) == 1 or current_lookahead == max_lookahead: # Add the last char to the amino chain. current_chain.chain_list.append(Amino(chars[0], 0, current_chain.chain_list[-1].get_fold_coordinates())) # Calculate the matrix (needed for the score.) and the score matrix, xy_offset = get_matrix_efficient(current_chain.chain_list) score = get_score_efficient(current_chain.chain_list, matrix, xy_offset, ch_score) global best_score global best_chain # IF this score is the best score, save this score + chain as a global. if score < best_score: best_score = score best_chain = copy.deepcopy(current_chain.chain_list) # Abort that chain if it isnt the best score. remove amino we just added del current_chain.chain_list[-1] return None # Get legal moves on the position of that amino legal_moves = get_legal_moves_nomirror(current_chain.chain_list[-1].get_fold_coordinates(), current_chain) # If no legals move left, abort the chain. The protein got "stuck" if not legal_moves: return None # Go recursively through all legal moves and its child legal moves etc. else: for move in legal_moves: # Find best chain needs a new updated chain, but the old chain also needs to be remembered. last_amino = current_chain.chain_list[-1] # Append the next amino and increase current lookahead current_lookahead += 1 current_chain.chain_list.append(Amino(chars[0], move, last_amino.get_fold_coordinates())) current_chain.update_mirror_status() find_best_chain(current_chain, chars, max_lookahead, ch_score) current_chain.update_mirror_status_reverse() # After the algo the lookahead should return to last value and the amino we just added should be removed again. current_lookahead -= 1 del current_chain.chain_list[-1]
def find_best_chain(current_chain, chars, ch_score): # The first char has to be popped because it processes that char in the last loop # Note: popping the first loop is also valid because the first char is build before loading the fold_selector. chars = chars[1:] # If there is only 1 char left we've arrived at the end of a chain. if len(chars) == 1: # Add the last char to the amino chain. current_chain.chain_list.append(Amino(chars[0], 0, current_chain.chain_list[-1].get_fold_coordinates())) # Calculate the matrix (needed for the score.) and the score matrix, xy_offset = get_matrix_efficient(current_chain.chain_list) score = get_score_efficient(current_chain.chain_list, matrix, xy_offset, ch_score) global best_score global best_chain # IF this score is the best score, save this score + chain as a global. if score < best_score: print("New best score: " + str(score)) best_score = score best_chain = copy.deepcopy(current_chain.chain_list) # Abort that chain if it isnt the best score. del current_chain.chain_list[-1] return None # Get legal moves on the position of that amino legal_moves = get_legal_moves_nomirror(current_chain.chain_list[-1].get_fold_coordinates(), current_chain) # If no legals move left, abort the chain. The protein got "stuck" if not legal_moves: return None # Go recursively through all legal moves and its child legal moves etc. else: for move in legal_moves: # Find best chain needs a new updated chain, but the old chain also needs to be remembered. last_amino = current_chain.chain_list[-1] current_chain.chain_list.append(Amino(chars[0], move, last_amino.get_fold_coordinates())) current_chain.update_mirror_status() find_best_chain(current_chain, chars, ch_score) current_chain.update_mirror_status_reverse() del current_chain.chain_list[-1]
def depth_search(protein, ch_score): char_counter = 1 # Skips the first char the index. while protein.char_counter < len(protein.amino_string): char = protein.amino_string[protein.char_counter] # Get the location the last amino folded to. # Note: an index of -1 gets the last object in a list. amino_xy = protein.chain.chain_list[-1].get_fold_coordinates() # Last amino always has fold of 0. if protein.char_counter + 1 == len(protein.amino_string): fold = 0 # Determine which fold to pick else: illegal_folds = None ideal_chain = fold_selector(amino_xy, char, protein.chain, illegal_folds, protein.amino_string, ch_score) # Ideal chain is already found, replace chain with ideal chain and break loop. if ideal_chain: protein.matrix, protein.chain.chain_list = get_matrix(best_chain) break # Adds amino to the protein chain. protein.chain.chain_list.append(Amino(char, fold, amino_xy)) char_counter += 1
def depth_search_lookahead(protein, ch_score, max_lookahead): global best_chain global best_score chars = protein.amino_string chain_length_goal = len(chars) # The first char amino is build in the proteine class chars = chars [1:] # Skips the first char the index. while True: char = chars[0] # Get the location the last amino folded to. # Note: an index of -1 gets the last object in a list. amino_xy = protein.chain.chain_list[-1].get_fold_coordinates() # Last amino always has fold of 0. if protein.char_counter + 1 == len(protein.amino_string): fold = 0 # Determine which fold to pick. Ideal chain is returned as true if the full chain is already processed. # If ideal_chain is false, the next ideal fold is given. else: ideal_chain, fold = fold_selector(protein.chain, chars, max_lookahead, chain_length_goal, ch_score) # Ideal chain is already found, replace chain with ideal chain and break loop. if ideal_chain: protein.matrix, protein.chain.chain_list = get_matrix(best_chain) break # Adds amino to the protein chain. protein.chain.chain_list.append(Amino(char, fold, amino_xy)) protein.chain.update_mirror_status() print("Char " + str(len(protein.chain.chain_list)) +"/" + str(len(protein.amino_string)) + ". Beste score: " + str(best_score), file=sys.stdout) print("") # Pop the first char from the string. That one has been processed now chars = chars[1:] # Reset the best score and best chain best_score = 1 best_chain = [] # Update matrix and protein of the chain. Offset happens now. protein.matrix, protein.chain.chain_list = get_matrix(protein.chain.chain_list) best_score = 1 best_chain = []
def __init__(self, amino_string, mode): if mode in ["2d", "2D"]: self.mode_3d = False elif mode in ["3d", "3D"]: self.mode_3d = True else: raise Exception("Dimension mode not valid") # The list which contains the ordered and connected aminos. self.chain = Chain([]) # The string of the protein, make it case insensitive self.amino_string = amino_string.upper() # Check if amino string contains chars other than H, C and P illegal_chars = list(string.ascii_uppercase) illegal_chars.remove("H") illegal_chars.remove("P") illegal_chars.remove("C") for char in self.amino_string: if char in illegal_chars: raise Exception("Amino string contains illegal chars") # Adds the first amino to the chain, direction is hard-coded as "up". if self.mode_3d == True: self.chain.chain_list.append(Amino(self.amino_string[0], 2, [0,0,0])) else: self.chain.chain_list.append(Amino(self.amino_string[0], 2, [0,0])) # Used to store the FINISHED matrix. self.matrix = [] # Some functions use this to determine the char that is being "calculated" self.char_counter = 1
def initializeProtein(self): """ Initializes the protein by placing it in a straight line parallel to the y-axis. """ self.aminoList = [] self.occupied = [] for id in range(self.proteinLength): self.aminoList.append(Amino(id, self.proteinString[id])) thisCoordinate = [0] * int(self.plane[0]) thisCoordinate[1] = id self.aminoList[id].coordinate = thisCoordinate self.occupied.append(thisCoordinate)
def depth_search_iterative(protein, ch_score): char_counter = 1 # Build a matrix with dimensions of 2 * length of the protein +1 matrix_dimensions = 2 * len(protein.amino_string) + 1 for i in range(matrix_dimensions + 1): row = [] for j in range(matrix_dimensions + 1): row.append(" ") protein.chain.matrix.append(row) # Center the first amino's coordinates in the matrix and add it to the matrix. protein.chain.chain_list[0].coordinates = [ len(protein.amino_string) + 1, len(protein.amino_string) + 1 ] protein.chain.matrix[len(protein.amino_string) + 1][len(protein.amino_string) + 1] = protein.chain.chain_list[0] # Skips the first char the index. while protein.char_counter < len(protein.amino_string): char = protein.amino_string[protein.char_counter] # Get the location the last amino folded to. # Note: an index of -1 gets the last object in a list. amino_xy = protein.chain.chain_list[-1].get_fold_coordinates() # Last amino always has fold of 0. if protein.char_counter + 1 == len(protein.amino_string): fold = 0 # Determine which fold to pick else: illegal_folds = None ideal_chain = fold_selector(amino_xy, char, protein.chain, illegal_folds, protein.amino_string, ch_score) # Ideal chain is already found, replace chain with ideal chain and break loop. if ideal_chain: protein.matrix, protein.chain.chain_list = get_matrix(best_chain) break # Adds amino to the protein chain. protein.chain.chain_list.append(Amino(char, fold, amino_xy)) char_counter += 1
def build_straight_protein(protein): mode_3d = protein.mode_3d if mode_3d: protein.chain.chain_list[0].coordinates = [0, 0, 0] else: protein.chain.chain_list[0].coordinates = [0, 0] for index, char in enumerate(protein.amino_string): if index == 0: continue new_amino = Amino( char, 2, protein.chain.chain_list[index - 1].get_fold_coordinates()) protein.chain.chain_list.append(new_amino) protein.matrix, protein.chain.chain_list = get_matrix( protein.chain.chain_list)
def constructive(proteinString): """ This function creates the protein, places the first two amino acids and starts the placing of the rest of the amino acids. Argument: proteinString -- a string that contains the amino acids of the protein """ protein = Protein(proteinString, "2D") # Place first and second amino acid to prevent rotational symmetry for i in range(2): protein.aminoList.append(Amino(i, protein.proteinString[i])) protein.aminoList[i].coordinate = [0, i] protein.occupied.append([0, i]) # This starts the recursive function createFolded createFolded(protein, 2) print("Protein", proteinString, "has an optimal stability of", optStability[0])
def find_best_chain(current_chain, chars, ch_score, current_score): global best_score # The first char has to be popped because it processes that char in the last loop # Note: popping the first loop is also valid because the first char is build before loading the fold_selector. chars = chars[1:] # If there is only 1 char left we've arrived at the end of a chain. if len(chars) == 1: # Add the last char to the amino chain AND the recusrive chain matrix last_amino = current_chain.chain_list[-1] new_amino_x, new_amino_y = last_amino.get_fold_coordinates() new_amino = Amino(chars[0], 0, [new_amino_x, new_amino_y]) current_chain.chain_list.append(new_amino) current_chain.matrix[new_amino_y][new_amino_x] = new_amino # Get the new score by building on the last score new_score = get_score_iterative(current_chain.chain_list, current_chain.matrix, current_score) # Calculate the matrix (needed for the score.) and the score score = new_score global best_score global best_chain # IF this score is the best score, save this score + chain as a global. if score < best_score: print("New best score: " + str(score)) best_score = score best_chain = copy.deepcopy(current_chain.chain_list) # Abort that chain if it isnt the best score. also remove it from the matrix current_chain.matrix[new_amino_y][new_amino_x] = " " del current_chain.chain_list[-1] return None # Get legal moves on the position of that amino legal_moves = get_legal_moves_nomirror( current_chain.chain_list[-1].get_fold_coordinates(), current_chain) # If no legals move left, abort the chain. The protein got "stuck" if not legal_moves: return None # Go recursively through all legal moves and its child legal moves etc. else: for move in legal_moves: # Find best chain needs a new updated chain, but the old chain also needs to be remembered. last_amino = current_chain.chain_list[-1] new_amino_x, new_amino_y = last_amino.get_fold_coordinates() new_amino = Amino(chars[0], move, [new_amino_x, new_amino_y]) current_chain.chain_list.append(new_amino) # Also add that amino to the matrix, and update the mirror starus current_chain.matrix[new_amino_y][new_amino_x] = new_amino current_chain.update_mirror_status() # Calculate new score new_score = get_score_iterative(current_chain.chain_list, current_chain.matrix, current_score) find_best_chain(current_chain, chars, ch_score, new_score) # Reverse the matrix and mirror status current_chain.matrix[new_amino_y][new_amino_x] = " " current_chain.update_mirror_status_reverse() del current_chain.chain_list[-1]
def branch_and_bound_random(protein, ch_score, best_score_import, p1, p2): global best_score global p_below_average global p_above_average p_below_average = p1 p_above_average = p2 # Check if unsupported 3d mode. check_dimensions(protein.chain.chain_list) # You could import a score to start at (if you know the score to be at least that amount). best_score = best_score_import char_counter = 1 mode_3d = protein.mode_3d # Build up the partial energy list for every depth in the chain. global partial_energies partial_energies.append([]) for char in protein.amino_string: partial_energies.append([0, 0, 0]) if mode_3d: # Build a matrix with dimensions of 2 * length of the protein + 1. matrix_dimensions = 2 * len(protein.amino_string) + 1 for k in range(matrix_dimensions + 1): layer = [] for i in range(matrix_dimensions + 1): row = [] for j in range(matrix_dimensions + 1): row.append(" ") layer.append(row) protein.chain.matrix.append(layer) protein.chain.chain_list[0].coordinates = [ len(protein.amino_string) + 1, len(protein.amino_string) + 1, len(protein.amino_string) + 1 ] protein.chain.matrix[len(protein.amino_string) + 1][len(protein.amino_string) + 1][len(protein.amino_string) + 1] = protein.chain.chain_list[0] # 2D else: # Build a matrix with dimensions of 2 * length of the protein +1. matrix_dimensions = 2 * len(protein.amino_string) + 1 for i in range(matrix_dimensions + 1): row = [] for j in range(matrix_dimensions + 1): row.append(" ") protein.chain.matrix.append(row) # Center the first amino's coordinates in the matrix and add it to the matrix. protein.chain.chain_list[0].coordinates = [ len(protein.amino_string) + 1, len(protein.amino_string) + 1 ] protein.chain.matrix[len(protein.amino_string) + 1][len(protein.amino_string) + 1] = protein.chain.chain_list[0] # Perform all functions to add corrent spots. new_score, spots_to_add, spots_to_remove, spots_to_add_C, spots_to_remove_C = get_score_iterative_and_spots( protein.chain, protein.chain.matrix, 0) protein.chain.add_fold_spots(spots_to_add, "H") protein.chain.remove_fold_spots(spots_to_remove, "H") protein.chain.add_fold_spots(spots_to_add_C, "C") protein.chain.remove_fold_spots(spots_to_remove_C, "C") # Skips the first char the index. while protein.char_counter < len(protein.amino_string): char = protein.amino_string[protein.char_counter] # Get the location the last amino folded to. # Note: an index of -1 gets the last object in a list. amino_xy = protein.chain.chain_list[-1].get_fold_coordinates() # Last amino always has fold of 0. if protein.char_counter + 1 == len(protein.amino_string): fold = 0 # Determine which fold to pick. else: illegal_folds = None ideal_chain = fold_selector(amino_xy, char, protein.chain, illegal_folds, protein.amino_string, ch_score) # Ideal chain is already found, replace chain with ideal chain and break loop. if ideal_chain: protein.matrix, protein.chain.chain_list = get_matrix(best_chain) break # Adds amino to the protein chain. protein.chain.chain_list.append(Amino(char, fold, amino_xy)) char_counter += 1
def find_best_chain(current_chain, chars, ch_score, current_score): global best_score # The first char has to be popped because it processes that char in the last loop. # Note: popping the first loop is also valid because the first char is build before loading the fold_selector. chars = chars[1:] mode_3d = is_chain_3d(current_chain.chain_list) # If there is only 1 char left we've arrived at the end of a chain. if len(chars) == 1: # Add the last char to the amino chain AND the recusrive chain matrix last_amino = current_chain.chain_list[-1] coordinates = last_amino.get_fold_coordinates() new_amino = Amino(chars[0], 0, coordinates) current_chain.chain_list.append(new_amino) if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][ coordinates[0]] = new_amino else: current_chain.matrix[coordinates[1]][coordinates[0]] = new_amino new_score = get_score_iterative(current_chain.chain_list, current_chain.matrix, current_score) # Calculate the matrix (needed for the score.) and the score. score = new_score global best_chain # IF this score is the best score, save this score + chain as a global. if score < best_score: print("New best score: " + str(score)) best_score = score best_chain = copy.deepcopy(current_chain.chain_list) # Abort that chain if it isnt the best score. also remove it from the matrix. if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][ coordinates[0]] = " " else: current_chain.matrix[coordinates[1]][coordinates[0]] = " " del current_chain.chain_list[-1] return None # Get legal moves on the position of that amino. legal_moves = get_legal_moves_nomirror( current_chain.chain_list[-1].get_fold_coordinates(), current_chain) # If no legals move left, abort the chain. The protein got "stuck". if not legal_moves: return None # Go recursively through all legal moves and its child legal moves etc. else: for move in legal_moves: # Find best chain needs a new updated chain, but the old chain also needs to be remembered. last_amino = current_chain.chain_list[-1] coordinates = last_amino.get_fold_coordinates() new_amino = Amino(chars[0], move, coordinates) current_chain.chain_list.append(new_amino) skip_function = False # Also add that amino to the matrix, and update the mirror status. if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][ coordinates[0]] = new_amino else: current_chain.matrix[coordinates[1]][ coordinates[0]] = new_amino current_chain.update_mirror_status() # Calculate new score and and/remove the correct fold spots. new_score, spots_to_add, spots_to_remove, spots_to_add_C, spots_to_remove_C = get_score_iterative_and_spots( current_chain, current_chain.matrix, current_score) # Remove the spots that are now filled by aminos. current_chain.remove_fold_spots(spots_to_remove, "H") current_chain.remove_fold_spots(spots_to_remove_C, "C") # Change odd/even. current_chain.odd = not current_chain.odd # Add the spots that were newly created. current_chain.add_fold_spots(spots_to_add, "H") current_chain.add_fold_spots(spots_to_add_C, "C") # Calculate max extra score and prune spots that are too far away. extra_score_possible, removed_even, removed_odd, removed_even_C, removed_odd_C = current_chain.get_max_possible_extra_score( chars[1:]) max_possible = new_score + extra_score_possible # Of a new best score cant be reached, abandon chain. if max_possible >= best_score: skip_function = True global partial_energies current_depth = len(current_chain.chain_list) # If it is the new best score for that depth. if new_score <= partial_energies[current_depth][0]: if new_score < partial_energies[current_depth][0]: partial_energies[current_depth][0] = new_score partial_energies[current_depth][1] = calculate_average( partial_energies[current_depth][1], partial_energies[current_depth][2], new_score) partial_energies[current_depth][2] += 1 # The score is below average (so better) for that depth. elif new_score <= partial_energies[current_depth][1]: global p_below_average random_number = random.uniform(0, 1) if random_number > p_below_average: skip_function = True else: partial_energies[current_depth][1] = calculate_average( partial_energies[current_depth][1], partial_energies[current_depth][2], new_score) partial_energies[current_depth][2] += 1 # The score is above average (so worse) for that depth. else: global p_above_average random_number = random.uniform(0, 1) if random_number > p_above_average: skip_function = True else: partial_energies[current_depth][1] = calculate_average( partial_energies[current_depth][1], partial_energies[current_depth][2], new_score) partial_energies[current_depth][2] += 1 if not skip_function: # The actual recursive function. find_best_chain(current_chain, chars, ch_score, new_score) # Undo all the changed to the spots that were made before calling the recursive function. current_chain.add_back_even(removed_even, "H") current_chain.add_back_odd(removed_odd, "H") current_chain.add_back_even(removed_even_C, "C") current_chain.add_back_odd(removed_odd_C, "C") current_chain.remove_fold_spots(spots_to_add, "H") current_chain.remove_fold_spots(spots_to_add_C, "C") # Change odd/even back. current_chain.odd = not current_chain.odd # Reverse the fold spots. current_chain.add_fold_spots(spots_to_remove, "H") current_chain.add_fold_spots(spots_to_remove_C, "C") # Reverse the matrix and mirror status. if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][ coordinates[0]] else: current_chain.matrix[coordinates[1]][coordinates[0]] = " " current_chain.update_mirror_status_reverse() # Undo the added amino. del current_chain.chain_list[-1]
def branch_and_bound_lookahead(protein, ch_score, best_score_import, max_lookahead): global best_score global best_chain best_score = best_score_import mode_3d = is_chain_3d(chain) if mode_3d: matrix_dimensions = 2 * len(protein.amino_string) + 1 for k in range(matrix_dimensions + 1): layer = [] for i in range(matrix_dimensions + 1): row = [] for j in range(matrix_dimensions + 1): row.append(" ") layer.append(row) protein.matrix.append(layer) protein.chain.chain_list[0].coordinates = [len(protein.amino_string) + 1 , len(protein.amino_string) + 1, len(protein.amino_string) + 1] protein.chain.matrix[len(protein.amino_string) + 1][len(protein.amino_string) + 1][len(protein.amino_string) + 1] = protein.chain.chain_list[0] else: # Build a matrix with dimensions of 2 * length of the protein +1 matrix_dimensions = 2 * len(protein.amino_string) + 1 for i in range(matrix_dimensions + 1): row = [] for j in range(matrix_dimensions + 1): row.append(" ") protein.chain.matrix.append(row) # Center the first amino's coordinates in the matrix and add it to the matrix. protein.chain.chain_list[0].coordinates = [len(protein.amino_string) + 1 , len(protein.amino_string) + 1] protein.chain.matrix[len(protein.amino_string) + 1][len(protein.amino_string) + 1] = protein.chain.chain_list[0] new_score, spots_to_add, spots_to_remove, spots_to_add_C, spots_to_remove_C = get_score_iterative_and_spots(protein.chain, protein.chain.matrix, 0) protein.chain.add_fold_spots(spots_to_add, "H") protein.chain.add_fold_spots(spots_to_add_C, "C") current_score = 0 # Skips the first char the index. while protein.char_counter < len(protein.amino_string): # print(str(self.char_counter)) char = protein.amino_string[protein.char_counter] # Get the location the last amino folded to. # Note: an index of -1 gets the last object in a list. amino_xy = protein.chain.chain_list[-1].get_fold_coordinates() # Last amino always has fold of 0. if protein.char_counter + 1 == len(protein.amino_string): fold = 0 # Determine which fold to pick else: ideal_chain, fold = fold_selector(amino_xy, char, protein.chain, protein.amino_string[protein.char_counter - 1:], ch_score, max_lookahead, current_score) # Ideal chain is already found, replace chain with ideal chain and break loop. if ideal_chain: for amino in best_chain: print(amino) protein.matrix, protein.chain.chain_list = get_matrix(best_chain) break new_amino = Amino(char, fold, amino_xy) # Adds amino to the protein chain. protein.chain.chain_list.append(new_amino) protein.chain.update_mirror_status() if mode_3d: protein.chain.matrix[amino_xy[0]][amino_xy[1]][amino_xy[2]] = new_amino else: # Also add that amino to the matrix, and update the mirror starus protein.chain.matrix[amino_xy[0]][amino_xy[1]] = new_amino # Calculate new score and and/remove the correct fold spots new_score, spots_to_add, spots_to_remove, spots_to_add_C, spots_to_remove_C = get_score_iterative_and_spots(protein.chain, protein.chain.matrix, current_score) current_score = new_score # Remove the spots that are now filled by aminos. protein.chain.remove_fold_spots(spots_to_remove, "H") protein.chain.remove_fold_spots(spots_to_remove_C, "C") # Change odd/even protein.chain.odd = not protein.chain.odd # Add the spots that were newly created. if protein.chain.chain_list[-1].atype == "H": spots_to_add.append(protein.chain.chain_list[-1].get_fold_coordinates()) if protein.chain.chain_list[-1].atype == "C": spots_to_add_C.append(protein.chain.chain_list[-1].get_fold_coordinates()) protein.chain.add_fold_spots(spots_to_add, "H") protein.chain.add_fold_spots(spots_to_add_C, "C") protein.chain.get_max_possible_extra_score(protein.amino_string[protein.char_counter:]) protein.char_counter += 1 for amino in protein.chain.chain_list: print(amino, end='') print() best_chain = [] best_score = 1 current_lookahead = 0 print(protein.chain.available_bonds_odd_H) print(protein.chain.available_bonds_odd_C) print(protein.chain.available_bonds_even_H) print(protein.chain.available_bonds_even_C) protein.matrix, protein.chain.chain_list = get_matrix(protein.chain.chain_list)
def breadth_search(protein, ch_score): # Check if unsupported 3d mode. check_dimensions(protein.chain.chain_list) # Get chain WITH first amino already in it. start_chain = protein.chain # Create queue and put the first amino in it queue = Queue(maxsize = 0) queue.put(start_chain) # Finished queues. Is this smart? finished_chains = [] # Go trough the queue. while not queue.empty(): # Get the first chain from the queue. chain_actual = queue.get() # Get the index from the length of the chain. index = len(chain_actual.chain_list) # Last amino always has fold of 0. if index + 1 == len(protein.amino_string): fold = 0 atype = protein.amino_string[index] coordinates = chain_actual.chain_list[-1].get_fold_coordinates() new_amino = Amino(atype, fold, coordinates) chain_actual.chain_list.append(new_amino) # Save the chain to the finished chain list. finished_chains.append(chain_actual) # Determine fold and make new chain for every possibility. else: legal_moves = get_legal_moves(chain_actual.chain_list[-1].get_fold_coordinates(), chain_actual.chain_list) # if there are no legal moves chain ends here. if legal_moves: # Go trough the legal moves and make a new_chain for every move, then put them in the queue. for move in legal_moves: atype = protein.amino_string[index] coordinates = chain_actual.chain_list[-1].get_fold_coordinates() # Make a new amino and add it to the a new chain with deepcopy. amino = Amino(atype, move, coordinates) new_chain = copy.deepcopy(chain_actual) new_chain.chain_list.append(amino) # Put the new chain in the queue. queue.put(new_chain) # The best score and corresponding chain that has been found. best_score = 1 best_chains = [] # Goes over all finished chains to find the one with the best score. for chain in finished_chains: matrix, xy_offset = get_matrix_efficient(chain.chain_list) score = get_score_efficient(chain.chain_list, matrix, xy_offset, ch_score) # If the score is better than the best score, replace best_chains. # if score is equal add chain to best_chains. if score < best_score: best_score = score best_chains = [] print("New best score: " + str(score)) best_chains.append(chain) elif score == best_score: best_chains.append(chain) protein.matrix, protein.chain.chain_list = get_matrix(best_chains[0].chain_list)
def beam_search(protein, ch_score): # Check if unsupported 3d mode. check_dimensions(protein.chain.chain_list) # Get chain WITH first amino already in it. start_chain = protein.chain # Create queue and put the first amino in it. queue = Queue(maxsize=0) queue.put(start_chain) # Finished queues. Is this smart? finished_chains = [] # Keeps track of scores in 1 layer. scores = [] # Go trough the queue. while not queue.empty(): # Get the first chain from the queue. chain_actual = queue.get() # Get the index from the length of the chain. index = len(chain_actual.chain_list) # get the globals global global_index global avg_scores # check for level change level change by comparing global index with actual index if index == global_index + 1: # change global index to new level global_index = index # update global avg score and reset scores sum_scores = sum(scores) / len(scores) avg_scores = sum_scores scores = [] # Remove chain from queue if score is worse than cutoff score. chain_score = chain_actual.score if chain_score > avg_scores: continue # Last amino always has fold of 0. if index + 1 == len(protein.amino_string): fold = 0 atype = protein.amino_string[index] coordinates = chain_actual.chain_list[-1].get_fold_coordinates() new_amino = Amino(atype, fold, coordinates) chain_actual.chain_list.append(new_amino) finished_chains.append(chain_actual) # Determine fold and make new chain for every possibility. else: legal_moves = get_legal_moves( chain_actual.chain_list[-1].get_fold_coordinates(), chain_actual.chain_list) # If there are no legal moves chain ends here. if legal_moves: # Go trough the legal moves and make a new_chain for every move, then put them in the queue. for move in legal_moves: atype = protein.amino_string[index] coordinates = chain_actual.chain_list[ -1].get_fold_coordinates() # Make a new amino and add it to the a new chain with deepcopy. amino = Amino(atype, move, coordinates) new_chain = copy.deepcopy(chain_actual) new_chain.chain_list.append(amino) # Put the new chain in the queue, set chain's score variable to its score, and add score to this layer's score list. matrix, offset = get_matrix_efficient(new_chain.chain_list) score = get_score_efficient(new_chain.chain_list, matrix, offset, 1) new_chain.score = score queue.put(new_chain) # add score to the list which tracks all scores in this level scores.append(score) # The best score and corresponding chain that has been found. best_score = 1 best_chains = [] # Goes over all finished chains to find the one with the best score. for chain in finished_chains: protein1 = Protein(protein.amino_string, "2d") protein1.matrix, protein1.chain = get_matrix( copy.deepcopy(chain).chain_list) matrix, xy_offset = get_matrix_efficient(chain.chain_list) score = get_score_efficient(chain.chain_list, matrix, xy_offset, ch_score) # If the score is better than the best score, replace best_chains. # If score is equal add chain to best_chains. if score < best_score: best_score = score best_chains = [] print("New best score: " + str(score)) best_chains.append(chain) elif score == best_score: best_chains.append(chain) protein.matrix, protein.chain.chain_list = get_matrix( best_chains[0].chain_list)
def createFolded(protein, idToMove): """ This function recursively places amino acids. To create all possible ways to fold a protein. Mirror images are prevented by using the fact that when the sum of the x-coordinates is zero and the next amino acid has three possible options, two of those options will be mirror images, so one of those will be removed. If the protein is not folded at least twice consecutively in the same direction, there will be no amino acids laying next to each other. This will result in the sum of the x- and y-coordinates to be two times the protein length - and therefore the stability will not to be checked. Arguments: protein -- object of class Protein idToMove -- positive integer, id of the amino acid that will be moved """ # Stop this function if idToMove exceeds the length of the protein if idToMove > (protein.proteinLength - 1): return del protein.aminoList[idToMove:] del protein.occupied[idToMove:] # Get the coordinate of the previous amino acid prevCo = protein.aminoList[(idToMove - 1)].coordinate # Get the unoccupied surrounding amino acids of the previous coordinates possibleCos = protein.getSurroundCo(prevCo, occupied=False) protein.aminoList.append(Amino(idToMove, protein.proteinString[idToMove])) xTotal = sum([xCo[0] for xCo in protein.occupied]) if xTotal == 0 and len(possibleCos) == 3: # Remove the last coordinate to prevent mirror images possibleCos.pop(1) for possibleCo in possibleCos: protein.aminoList[idToMove].coordinate = possibleCo try: protein.occupied[idToMove] = possibleCo except: protein.occupied.append(possibleCo) xTotal = abs(sum([xCo[0] for xCo in protein.occupied])) yTotal = abs(sum([yCo[1] for yCo in protein.occupied])) total = xTotal + yTotal # Check if all the amino acids have been placed if (idToMove == (protein.proteinLength - 1) and total != protein.proteinLength * 2): getStability(protein) if protein.stability < optStability[0]: optStability[0] = protein.stability # Place the next amino acid createFolded(protein, (idToMove + 1))
def find_best_chain(current_chain, chars, ch_score, current_score, max_lookahead): global best_score global current_lookahead # The first char has to be popped because it processes that char in the last loop # Note: popping the first loop is also valid because the first char is build before loading the fold_selector. chars = chars[1:] mode_3d = is_chain_3d(current_chain) # If there is only 1 char left we've arrived at the end of a chain. if len(chars) == 1 or current_lookahead == max_lookahead: # Add the last char to the amino chain AND the recusrive chain matrix last_amino = current_chain.chain_list[-1] coordinates = last_amino.get_fold_coordinates() new_amino = Amino(chars[0], 0, coordinates) current_chain.chain_list.append(new_amino) if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][coordinates[0]] = new_amino else: current_chain.matrix[coordinates[1]][coordinates[0]] = new_amino new_score = get_score_iterative(current_chain.chain_list, current_chain.matrix, current_score) # Calculate the matrix (needed for the score.) and the score score = new_score global best_chain # IF this score is the best score, save this score + chain as a global. if score < best_score: print("New best score: " + str(score)) best_score = score best_chain = copy.deepcopy(current_chain.chain_list) for amino in current_chain.chain_list: print(amino, end='') print() # Abort that chain if it isnt the best score. also remove it from the matrix if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][coordinates[0]] = " " else: current_chain.matrix[coordinates[1]][coordinates[0]] = " " del current_chain.chain_list[-1] return None # Get legal moves on the position of that amino legal_moves = get_legal_moves_nomirror(current_chain.chain_list[-1].get_fold_coordinates(), current_chain) # If no legals move left, abort the chain. The protein got "stuck" if not legal_moves: return None # Go recursively through all legal moves and its child legal moves etc. else: for move in legal_moves: # for amino in current_chain.chain_list: # print(amino, end="") # print() # print(str(current_chain.available_bonds_even_H), str(current_chain.available_bonds_odd_H)) # Find best chain needs a new updated chain, but the old chain also needs to be remembered. last_amino = current_chain.chain_list[-1] coordinates = last_amino.get_fold_coordinates() new_amino = Amino(chars[0], move, coordinates) current_chain.chain_list.append(new_amino) current_lookahead += 1 # Also add that amino to the matrix, and update the mirror starus if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][coordinates[0]] = new_amino else: current_chain.matrix[coordinates[1]][coordinates[0]] = new_amino current_chain.update_mirror_status() # Calculate new score and and/remove the correct fold spots new_score, spots_to_add, spots_to_remove, spots_to_add_C, spots_to_remove_C = get_score_iterative_and_spots(current_chain, current_chain.matrix, current_score) # Remove the spots that are now filled by aminos. try: current_chain.remove_fold_spots(spots_to_remove, "H") except: print(spots_to_remove) print(spots_to_remove) print(current_chain.available_bonds_odd_H) print(current_chain.available_bonds_odd_C) print(current_chain.available_bonds_even_H) print(current_chain.available_bonds_even_C) for amino in current_chain.chain_list: print(amino, end="") print(amino.coordinates) raise Exception() current_chain.remove_fold_spots(spots_to_remove_C, "C") # Change odd/even current_chain.odd = not current_chain.odd # Add the spots that were newly created. current_chain.add_fold_spots(spots_to_add, "H") current_chain.add_fold_spots(spots_to_add_C, "C") # Calculate max extra score and prune spots that are too far away. extra_score_possible, removed_even, removed_odd, removed_even_C, removed_odd_C = current_chain.get_max_possible_extra_score(chars[1:]) max_possible = new_score + extra_score_possible # Of a new best score cant be reached, abandon chain. if max_possible >= best_score: # Undo all the changes that were made to the spots. current_chain.add_back_even(removed_even, "H") current_chain.add_back_odd(removed_odd, "H") current_chain.add_back_even(removed_even_C, "C") current_chain.add_back_odd(removed_odd_C, "C") current_chain.remove_fold_spots(spots_to_add, "H") current_chain.remove_fold_spots(spots_to_add_C, "C") # Change odd/even back current_chain.odd = not current_chain.odd # Reverse the fold spots current_chain.add_fold_spots(spots_to_remove, "H") current_chain.add_fold_spots(spots_to_remove_C, "C") # Reverse the matrix and mirror status if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][coordinates[0]] else: current_chain.matrix[coordinates[1]][coordinates[0]] = " " current_chain.update_mirror_status_reverse() # Remove the last amino del current_chain.chain_list[-1] continue # print(str(new_score) + " + " + str(extra_score_possible) + " = " + str(max_possible)) # print("max possible score: " + str(extra_score_possible + new_score)) # print(str(removed_even), str(removed_odd)) # print(new_score) # print() # The actual recursive function find_best_chain(current_chain, chars, ch_score, new_score, max_lookahead) # Undo all the changed to the spots that were made before calling the recursive function. current_chain.add_back_even(removed_even, "H") current_chain.add_back_odd(removed_odd, "H") current_chain.add_back_even(removed_even_C, "C") current_chain.add_back_odd(removed_odd_C, "C") current_chain.remove_fold_spots(spots_to_add, "H") current_chain.remove_fold_spots(spots_to_add_C, "C") # Change odd/even back current_chain.odd = not current_chain.odd # Reverse the fold spots current_chain.add_fold_spots(spots_to_remove, "H") current_chain.add_fold_spots(spots_to_remove_C, "C") # Reverse the matrix and mirror status if mode_3d: current_chain.matrix[coordinates[2]][coordinates[1]][coordinates[0]] else: current_chain.matrix[coordinates[1]][coordinates[0]] = " " current_chain.update_mirror_status_reverse() current_lookahead -= 1 del current_chain.chain_list[-1]