def internal_mwis(stems_ib): """ Function: internal_mwis() Purpose: Scan the list of stems. For each outer stem, a maximum weight independent set (MWIS) calculation with confidence weights on the set of all possible inner stems is performed. This information is stored in a dictionary: Outer stem -> [Inner stems] Input: A dictionary of stems. Return: A dictionary of secondary structure information. """ structures_dic, candidate_list = {}, [] for stem, values in stems_ib.items(): entry = (stem[0], stem[1], values[0], values[1], values[1]) candidate_list.append(entry) candidate_list.sort() sorted_endpoint_list = functions.create_sorted_endpointlist(candidate_list) for endpoint in sorted_endpoint_list: # Scan sorted endpoints list if endpoint[1] == 'r': # If a right endpoint is scanned sorted_endpoint_list_recursive, nested = [], [] index = endpoint[3] interval = candidate_list[index - 1] nested = find_nested(interval, candidate_list) if nested: # MWIS on the set of inner stems sorted_endpoint_list_recursive = functions.create_sorted_endpointlist( nested) result = functions.MWIS(nested, sorted_endpoint_list_recursive) confidence = interval[4] + sum( [element[4] for element in result]) # Confidence sum # Store updated confidence for outer stem candidate_list[index - 1] = (interval[0], interval[1], interval[2], interval[3], confidence) stem = interval[0], interval[1], interval[2] # Store inner structure elements in dictionary structures_dic[stem] = result else: stem = interval[0], interval[1], interval[2] structures_dic[stem] = [] return structures_dic
def internal_mwis(stems_ib): """ Function: internal_mwis() Purpose: Scan the list of stems. For each outer stem, a maximum weight independent set (MWIS) calculation with confidence weights on the set of all possible inner stems is performed. This information is stored in a dictionary: Outer stem -> [Inner stems] Input: A dictionary of stems. Return: A dictionary of secondary structure information. """ structures_dic, candidate_list = {}, [] for stem, values in stems_ib.items(): entry = (stem[0], stem[1], values[0], values[1], values[1]) candidate_list.append(entry) candidate_list.sort() sorted_endpoint_list = functions.create_sorted_endpointlist(candidate_list) for endpoint in sorted_endpoint_list: # Scan sorted endpoints list if endpoint[1] == 'r': # If a right endpoint is scanned sorted_endpoint_list_recursive, nested = [], [] index = endpoint[3] interval = candidate_list[index - 1] nested = find_nested(interval, candidate_list) if nested: # MWIS on the set of inner stems sorted_endpoint_list_recursive = functions.create_sorted_endpointlist(nested) result = functions.MWIS(nested, sorted_endpoint_list_recursive) confidence = interval[4] + sum([element[4] for element in result]) # Confidence sum # Store updated confidence for outer stem candidate_list[index-1] = (interval[0], interval[1], interval[2], interval[3], confidence) stem = interval[0], interval[1], interval[2] # Store inner structure elements in dictionary structures_dic[stem] = result else: stem = interval[0], interval[1], interval[2] structures_dic[stem] = [] return structures_dic
def method(stem_dic_mwis, pk_recursive_dic, bulges_internal, multiloops, best_khps): """ Function: method() Purpose: Maximum weight independent set (MWIS) calculation using the set of secondary structure elements, pseudoknots and kissing hairpins. Hairpin loops may contain inner structure elements. Input: Dictionaries of structure elements. Return: Structure elements in the MWIS. """ crossing_structures, secondary_structures, mwis_dic, candidate_list = {}, {}, {}, [] for stem, values in stem_dic_mwis.items(): if values[3] < 0.0: element = (stem[0], stem[1], values[0], values[1], -1 * round(values[3], 2), "hp") candidate_list.append(element) for pk_stem, pk_energy in pk_recursive_dic.items(): element = (pk_stem[0], pk_stem[1], pk_stem[4], pk_stem[7], -1 * round(pk_energy[0], 2), "pk", pk_stem[2], pk_stem[3], pk_stem[4], pk_stem[5], pk_stem[6], pk_stem[7], pk_stem[8]) candidate_list.append(element) for stem, values in bulges_internal.items(): element = (stem[0], stem[1], values[0], values[1], -1 * round(values[2], 2), "ib") candidate_list.append(element) for stem, values in multiloops.items(): element = (stem[0], stem[1], values[0], values[1], -1 * round(values[2], 2), "ml") candidate_list.append(element) for stem, values in best_khps.items(): element = (stem[0], stem[1], values[1], 0.0, -1 * round(values[1], 2), "khp") candidate_list.append(element) if candidate_list: candidate_list.sort() sorted_endpoint_list = functions.create_sorted_endpointlist( candidate_list) for endpoint in sorted_endpoint_list: # Scan sorted endpoints list if endpoint[1] == 'r': # If a right endpoint is scanned outer_interval = candidate_list[endpoint[3] - 1] if outer_interval[5] == 'hp': nested, only_hp_ib_ml = find_nested( outer_interval, candidate_list) # MWIS on the set of nested structure elements if nested and only_hp_ib_ml == False: endpoint_list_recursive = functions.create_sorted_endpointlist( nested) result = functions.MWIS(nested, endpoint_list_recursive) # Free energy sum energy = outer_interval[4] for element in result: energy = energy + element[4] # Store updated free energy for outer stem candidate_list[endpoint[3] - 1] = (outer_interval[0], outer_interval[1], outer_interval[2], outer_interval[3], energy, outer_interval[5]) stem = outer_interval[0], outer_interval[ 1], outer_interval[2] # Store inner structure elements in dictionary mwis_dic[stem] = result # Main MWIS calculation sorted_endpoint_list_recursive = functions.create_sorted_endpointlist( candidate_list) result = functions.MWIS(candidate_list, sorted_endpoint_list_recursive) # Free energy sum energy = sum([item[4] for item in result]) # Search for detected pseudoknots and kissing hairpins for element in result: if element[5] == 'khp' or element[5] == 'pk': crossing_structures[element] = element[4] if element[5] == 'hp' or element[5] == 'ib' or element[5] == 'ml': secondary_structures[element] = element[4] if element[5] == 'hp': # Hairpin loop can have nested elements crossing_structures, secondary_structures = print_recursive( element, mwis_dic, crossing_structures, secondary_structures) return mwis_dic, crossing_structures, secondary_structures
def add_recursive_elements(i, recursive_loop, pk_structure, stem_dic, bulges_internal, multiloops): """ Function: add_recursive_elements() Purpose: For a core pseudoknot, add recursive secondary structure elements for a given loop L1, L2 or L3. Input: Pseudoknot and elements for a loop. Return: Pseudoknots in dot-bracket notation. """ # Calculate MWIS to avoid overlapping base pairs if recursive_loop: candidate_list = [] for item in recursive_loop: # Weights need to be positive element = (item[0], item[1], -1.0*item[2], -1.0*item[2], -1.0*item[2], item[4]) candidate_list.append(element) candidate_list.sort() sorted_endpoint_list = functions.create_sorted_endpointlist(candidate_list) result = functions.MWIS(candidate_list, sorted_endpoint_list) mwis_set = [] overlapping_set = [] for item in result: item_format = (item[0], item[1], -1.0*item[2], -1.0*item[2], item[5]) mwis_set.append(item_format) for item in recursive_loop: if item not in mwis_set: overlapping_set.append(item) for element in mwis_set: start = element[0] - i end = element[1] - i if element[4] == 'hp': element_length = stem_dic[element[0],element[1]][0] + 1 for counter in xrange(1,element_length): pk_structure = pk_structure[0:start] + list('(') + pk_structure[start+1:] pk_structure = pk_structure[0:end] + list(')') + pk_structure[end+1:] start = start + 1 end = end - 1 elif element[4] == 'ib': structure_ib = bulges_internal[element[0],element[1]][1] structure_ib = structure_ib.replace(':','') # Cut off dangling ends pk_structure = pk_structure[0:start] + list(structure_ib) + pk_structure[end+1:] elif element[4] == 'ml': structure_ml = multiloops[element[0],element[1]][1] structure_ml = structure_ml.replace(':','') # Cut off dangling ends pk_structure = pk_structure[0:start] + list(structure_ml) + pk_structure[end+1:] for element in overlapping_set: start = element[0] - i + 1 end = element[1] - i - 1 if element[4] == 'hp': element_length = stem_dic[element[0],element[1]][0] for counter in xrange(1,element_length): pk_structure = pk_structure[0:start] + list('(') + pk_structure[start+1:] pk_structure = pk_structure[0:end] + list(')') + pk_structure[end+1:] start = start + 1 end = end - 1 elif element[4] == 'ib': structure_ib = bulges_internal[element[0],element[1]][1] structure_ib = structure_ib.replace(':','') # Cut off dangling ends structure_ib = structure_ib[1:-1] pk_structure = pk_structure[0:start] + list(structure_ib) + pk_structure[end+1:] elif element[4] == 'ml': structure_ml = multiloops[element[0],element[1]][1] structure_ml = structure_ml.replace(':','') # Cut off dangling ends structure_ml = structure_ml[1:-1] pk_structure = pk_structure[0:start] + list(structure_ml) + pk_structure[end+1:] return pk_structure
def method(stem_dic_mwis, pk_recursive_dic, bulges_internal, multiloops, best_khps): """ Function: method() Purpose: Maximum weight independent set (MWIS) calculation using the set of secondary structure elements, pseudoknots and kissing hairpins. Hairpin loops may contain inner structure elements. Input: Dictionaries of structure elements. Return: Structure elements in the MWIS. """ crossing_structures, secondary_structures, mwis_dic, candidate_list = {}, {}, {}, [] for stem, values in stem_dic_mwis.items(): if values[3] < 0.0: element = (stem[0], stem[1], values[0], values[1], -1*round(values[3], 2), "hp") candidate_list.append(element) for pk_stem, pk_energy in pk_recursive_dic.items(): element = (pk_stem[0], pk_stem[1], pk_stem[4], pk_stem[7], -1*round(pk_energy[0], 2), "pk", pk_stem[2], pk_stem[3], pk_stem[4], pk_stem[5], pk_stem[6], pk_stem[7], pk_stem[8]) candidate_list.append(element) for stem, values in bulges_internal.items(): element = (stem[0], stem[1], values[0], values[1], -1*round(values[2], 2), "ib") candidate_list.append(element) for stem, values in multiloops.items(): element = (stem[0], stem[1], values[0], values[1], -1*round(values[2], 2), "ml") candidate_list.append(element) for stem, values in best_khps.items(): element = (stem[0], stem[1], values[1], 0.0, -1*round(values[1], 2), "khp") candidate_list.append(element) if candidate_list: candidate_list.sort() sorted_endpoint_list = functions.create_sorted_endpointlist(candidate_list) for endpoint in sorted_endpoint_list: # Scan sorted endpoints list if endpoint[1] == 'r': # If a right endpoint is scanned outer_interval = candidate_list[endpoint[3] - 1] if outer_interval[5] == 'hp': nested, only_hp_ib_ml = find_nested(outer_interval, candidate_list) # MWIS on the set of nested structure elements if nested and only_hp_ib_ml == False: endpoint_list_recursive = functions.create_sorted_endpointlist(nested) result = functions.MWIS(nested, endpoint_list_recursive) # Free energy sum energy = outer_interval[4] for element in result: energy = energy + element[4] # Store updated free energy for outer stem candidate_list[endpoint[3] - 1] = (outer_interval[0], outer_interval[1], outer_interval[2], outer_interval[3], energy, outer_interval[5]) stem = outer_interval[0], outer_interval[1], outer_interval[2] # Store inner structure elements in dictionary mwis_dic[stem] = result # Main MWIS calculation sorted_endpoint_list_recursive = functions.create_sorted_endpointlist(candidate_list) result = functions.MWIS(candidate_list, sorted_endpoint_list_recursive) # Free energy sum energy = sum([item[4] for item in result]) # Search for detected pseudoknots and kissing hairpins for element in result: if element[5] == 'khp' or element[5] == 'pk': crossing_structures[element] = element[4] if element[5] == 'hp' or element[5] == 'ib' or element[5] == 'ml': secondary_structures[element] = element[4] if element[5] == 'hp': # Hairpin loop can have nested elements crossing_structures, secondary_structures = print_recursive(element, mwis_dic, crossing_structures, secondary_structures) return mwis_dic, crossing_structures, secondary_structures