def calc_dG_SDopen(self, mRNA_structure, mRNA_rRNA_structure): """Calculate the dG required to unfold the nucleotides in the 16S rRNA binding site.""" mRNA = mRNA_structure["mRNA"] program = mRNA_structure["program"] index = mRNA_structure["MinStructureID"] dG_mRNA = mRNA_structure[program + "_energy"][index] index = mRNA_rRNA_structure["MinStructureID"] bp_x_1 = mRNA_rRNA_structure["subopt_basepairing_x"][index][:] bp_y_1 = mRNA_rRNA_structure["subopt_basepairing_y"][index][:] most_5p_mRNA = self.infinity most_3p_mRNA = -self.infinity for (nt_x, nt_y) in zip(bp_x_1, bp_y_1): if nt_y > len(mRNA): #nt is rRNA most_5p_mRNA = min(most_5p_mRNA, bp_x_1[bp_y_1.index(nt_y)]) most_3p_mRNA = max(most_3p_mRNA, bp_x_1[bp_y_1.index(nt_y)]) pre_mRNA = mRNA[0:most_5p_mRNA] post_mRNA = mRNA[most_3p_mRNA+1:len(mRNA)+1] pre_fold = NuPACK([pre_mRNA],material = self.RNA_model) pre_fold.mfe([1],dangles = self.dangles, Temp = self.temp) dG_pre = pre_fold["mfe_energy"][0] post_fold = NuPACK([post_mRNA],material = self.RNA_model) post_fold.mfe([1],dangles = self.dangles, Temp = self.temp) dG_post = post_fold["mfe_energy"][0] energy = dG_pre + dG_post ddG_mRNA = energy - dG_mRNA #positive if work is required to unfold SD sequence return ddG_mRNA
def calc_dG_mRNA(self,start_pos): """Calculates the dG_mRNA given the mRNA sequence.""" mRNA = self.mRNA_input[max(0,start_pos-self.cutoff):min(len(self.mRNA_input),start_pos+self.cutoff)] fold = NuPACK([mRNA],self.RNA_model) fold.mfe([1], Temp = self.temp, dangles = self.dangles) structure = fold structure["mRNA"] = mRNA structure["bp_x"] = fold["mfe_basepairing_x"][0] structure["bp_y"] = fold["mfe_basepairing_y"][0] structure["dG_mRNA"] = fold["mfe_energy"][0] structure["MinStructureID"] = 0 dG_mRNA_folding = fold["mfe_energy"][0] return (dG_mRNA_folding, structure)
def calc_dG_rRNA(self): """Calculates the dG of folding for the last 9 nt of the 16S rRNA. Not used in the free energy model.""" fold = NuPACK([self.rRNA],self.RNA_model) fold.mfe([1], Temp = self.temp, dangles = "all") dG_rRNA_folding = fold["mfe_energy"][0] return dG_rRNA_folding
def calc_dG_standby_site(self,structure_old, rRNA_binding = True): """Calculates the dG_standby given the structure of the mRNA:rRNA complex""" #To calculate the mfe structure while disallowing base pairing at the standby site, we split the folded mRNA sequence into three parts: (i) a pre-sequence (before the standby site) that can fold; (ii) the standby site, which can not fold; (iii) the 16S rRNA binding site and downstream sequence, which has been previously folded. import copy structure = copy.deepcopy(structure_old) mRNA = structure["mRNA"] bp_x = structure["bp_x"] bp_y = structure["bp_y"] energy_before = structure["dG_mRNA_rRNA"] #without spacing effects #Identify the most 5p mRNA nt that is bound to rRNA for (nt_x, nt_y) in zip(bp_x, bp_y): if nt_x <= len(mRNA) and nt_y > len(mRNA): #nt_x is mRNA, nt_y is rRNA, they are bound. most_5p_mRNA = nt_x #starts counting from 0 break #Extract the base pairings that are 3' of the most_5p_mRNA base pairing bp_x_3p = [] bp_y_3p = [] for (nt_x, nt_y) in zip(bp_x, bp_y): if nt_x >= most_5p_mRNA: bp_x_3p.append(nt_x) bp_y_3p.append(nt_y) #Create the mRNA subsequence mRNA_subsequence = mRNA[0:max(0,most_5p_mRNA - self.standby_site_length - 1)] standby_site = mRNA[most_5p_mRNA - self.standby_site_length - 1:most_5p_mRNA] #Fold it and extract the base pairings if (len(mRNA_subsequence)) > 0: fold = NuPACK([mRNA_subsequence], material = self.RNA_model) fold.mfe([1], dangles = self.dangles, Temp = self.temp) energy_after_5p = fold["mfe_energy"][0] bp_x_5p = fold["mfe_basepairing_x"][0] #[0] added 12/13/07 bp_y_5p = fold["mfe_basepairing_y"][0] else: bp_x_5p = [] bp_y_5p = [] energy_after_5p = 0.0 #Put the sets of base pairings together bp_x_after = [] bp_y_after = [] for (nt_x, nt_y) in zip(bp_x_5p, bp_y_5p): bp_x_after.append(nt_x) bp_y_after.append(nt_y) for (nt_x, nt_y) in zip(bp_x_3p, bp_y_3p): bp_x_after.append(nt_x) bp_y_after.append(nt_y) #Calculate its energy fold = NuPACK([mRNA, self.rRNA], material = self.RNA_model) energy_after = fold.energy([1, 2], bp_x_after, bp_y_after, dangles = self.dangles, Temp = self.temp) dG_standby_site = energy_before - energy_after if (dG_standby_site > 0.0): dG_standby_site = 0.0 index = structure["MinStructureID"] structure["bp_x"] = bp_x_after structure["bp_y"] = bp_y_after structure["subopt_basepairing_x"][index] = bp_x_after structure["subopt_basepairing_y"][index] = bp_y_after structure["subopt_energy"][index] = energy_after structure["dG_mRNA_rRNA_corrected"] = energy_after return (dG_standby_site, structure)
def calc_dG_mRNA_rRNA(self,start_pos): """Calculates the dG_mRNA_rRNA from the mRNA and rRNA sequence. Considers all feasible 16S rRNA binding sites and includes the effects of non-optimal spacing.""" begin = max(0,start_pos-self.cutoff) mRNA_len = min(len(self.mRNA_input),start_pos+self.cutoff) start_pos_in_subsequence = min(start_pos, self.cutoff) startpos_to_end_len = mRNA_len - start_pos_in_subsequence - begin #1. identify a list of rRNA-binding sites. Binding sites are hybridizations between the mRNA and rRNA and can include mismatches, bulges, etc. Intra-molecular folding is also allowed within the mRNA. The subopt program is used to generate a list of optimal & suboptimal binding sites. #Constraints: the entire rRNA-binding site must be upstream of the start codon mRNA = self.mRNA_input[begin:start_pos] if len(mRNA) == 0: raise CalcError("Warning: There is a leaderless start codon, which is being ignored.") #print "After exception" fold = NuPACK([mRNA,self.rRNA],material = self.RNA_model) fold.subopt([1, 2],self.energy_cutoff,dangles = self.dangles, Temp = self.temp) if len(fold["subopt_basepairing_x"]) == 0: raise CalcError("Warning: The 16S rRNA has no predicted binding site. Start codon is considered as leaderless and ignored.") #2. Calculate dG_spacing for each 16S rRNA binding site #Calculate the aligned spacing for each binding site in the list aligned_spacing = [] for (bp_x, bp_y) in zip(fold["subopt_basepairing_x"], fold["subopt_basepairing_y"]): aligned_spacing.append(self.calc_aligned_spacing(mRNA, start_pos_in_subsequence, bp_x,bp_y)) dG_spacing_list = [] dG_mRNA_rRNA = [] dG_mRNA_rRNA_withspacing = [] #Calculate dG_spacing using aligned spacing value. Add it to dG_mRNA_rRNA. for (counter) in range(len(fold["subopt_basepairing_x"])): dG_mRNA_rRNA.append(fold["subopt_energy"][counter]) val = self.calc_dG_spacing(aligned_spacing[counter]) dG_spacing_list.append(val) dG_mRNA_rRNA_withspacing.append(val + fold["subopt_energy"][counter]) #3. Find 16S rRNA binding site that minimizes dG_spacing+dG_mRNA_rRNA. [dG_mRNA_rRNA_folding, index] = self.find_min(dG_mRNA_rRNA_withspacing) dG_spacing_final = dG_spacing_list[index] dG_mRNA_rRNA_nospacing = dG_mRNA_rRNA[index] #Check: Is the dG spacing large compared to the energy gap? If so, this means the list of suboptimal 16S rRNA binding sites generated by subopt is too short. if dG_spacing_final > self.energy_cutoff: if self.verbose: print "Warning: The spacing penalty is greater than the energy gap. dG (spacing) = ", dG_spacing_final #4. Identify the 5' and 3' ends of the identified 16S rRNA binding site. Create a base pair list. most_5p_mRNA = self.infinity most_3p_mRNA = -self.infinity #Generate a list of rRNA-mRNA base paired nucleotides bp_x_target = [] bp_y_target = [] bp_x = fold["subopt_basepairing_x"][index] bp_y = fold["subopt_basepairing_y"][index] for (nt_x, nt_y) in zip(bp_x, bp_y): if nt_y > len(mRNA): #nt is rRNA most_5p_mRNA = min(most_5p_mRNA, bp_x[bp_y.index(nt_y)]) most_3p_mRNA = max(most_3p_mRNA, bp_x[bp_y.index(nt_y)]) bp_x_target.append(nt_x) bp_y_target.append(nt_y) #The rRNA-binding site is between the nucleotides at positions most_5p_mRNA and most_3p_mRNA #Now, fold the pre-sequence, rRNA-binding-sequence and post-sequence separately. Take their base pairings and combine them together. Calculate the total energy. For secondary structures, this splitting operation is allowed. #We postulate that not all of the post-sequence can form secondary structures. Once the 30S complex binds to the mRNA, it prevents the formation of secondary structures that are mutually exclusive with ribosome binding. We define self.footprint to be the length of the 30S complex footprint. Here, we assume that the entire mRNA sequence downstream of the 16S rRNA binding site can not form secondary structures. mRNA_pre = self.mRNA_input[begin:begin+most_5p_mRNA-1] post_window_end = mRNA_len + 1 post_window_begin = min(start_pos + self.footprint,post_window_end) #Footprint post_window_end = mRNA_len + 1 mRNA_post = self.mRNA_input[post_window_begin:post_window_end] mRNA_pre_len = len(mRNA_pre) mRNA_post_len = len(mRNA_post) mRNA_rRNA_binding_len = most_3p_mRNA - most_5p_mRNA + 1 total_folded_len = mRNA_pre_len + mRNA_post_len + mRNA_rRNA_binding_len total_bp_x = [] total_bp_y = [] #Calculate pre-sequence folding if len(mRNA_pre) > 0: fold_pre = NuPACK([mRNA_pre], material = self.RNA_model) fold_pre.mfe([1], dangles = self.dangles, Temp = self.temp) bp_x_pre = fold_pre["mfe_basepairing_x"][0] bp_y_pre = fold_pre["mfe_basepairing_y"][0] else: bp_x_pre = [] bp_y_pre = [] #Add pre-sequence base pairings to total base pairings offset = 0 #Begins at 0 for (nt_x, nt_y) in zip(bp_x_pre, bp_y_pre): total_bp_x.append(nt_x + offset) total_bp_y.append(nt_y + offset) #Add rRNA-binding site base pairings to total base pairings offset = 0 #Begins at zero if startpos_to_end_len < self.cutoff: rRNA_offset = startpos_to_end_len else: rRNA_offset = startpos_to_end_len for (nt_x, nt_y) in zip(bp_x_target, bp_y_target): total_bp_x.append(nt_x + offset) total_bp_y.append(nt_y + rRNA_offset) #Calculate post-sequence folding if len(mRNA_post) > 0: fold_post = NuPACK([mRNA_post], material = self.RNA_model) fold_post.mfe([1], dangles = self.dangles, Temp = self.temp) bp_x_post = fold_post["mfe_basepairing_x"][0] bp_y_post = fold_post["mfe_basepairing_y"][0] else: bp_x_post = [] bp_y_post = [] offset = post_window_begin - begin for (nt_x, nt_y) in zip(bp_x_post, bp_y_post): total_bp_x.append(nt_x + offset) total_bp_y.append(nt_y + offset) mRNA = self.mRNA_input[begin:mRNA_len] fold = NuPACK([mRNA, self.rRNA], material = self.RNA_model) total_energy = fold.energy([1, 2], total_bp_x, total_bp_y, Temp = self.temp, dangles = self.dangles) energy_nowindows = dG_mRNA_rRNA_nospacing total_energy_withspacing = total_energy + dG_spacing_final structure = fold structure["program"] = "subopt" structure["mRNA"] = mRNA structure["MinStructureID"] = 0 structure["dG_mRNA_rRNA"] = total_energy structure["dG_mRNA_rRNA_withspacing"] = total_energy_withspacing structure["dG_spacing"] = dG_spacing_final structure["subopt_energy"] = [total_energy_withspacing] structure["subopt_basepairing_x"] = [total_bp_x] structure["subopt_basepairing_y"] = [total_bp_y] structure["subopt_composition"] = [1, 2] structure["bp_x"] = total_bp_x structure["bp_y"] = total_bp_y return (total_energy_withspacing, structure)
def __init__(self, OBJECT, MODE='TOTAL'): self.name = OBJECT.name concent = [1e-6, 1e-6] if __name__ == '__main__': print('Calculating test-tube NuPACK simulation') if isinstance(OBJECT, Circuit) and MODE == 'TOTAL': guide = OBJECT.__GUIDE__[:-1] concent += [1e-6, 1e-6] elif isinstance(OBJECT, Circuit) and MODE == 'FUEL': guide = (OBJECT.__GUIDE__[0:2] + [OBJECT.__GUIDE__[-1]])[::-1] seq_list = [] for el in guide: seq_list += [getattr(OBJECT, el)] eq1 = NuPACK(Sequence_List=seq_list, material='dna') eq2 = NuPACK(Sequence_List=seq_list, material='dna') eq1.complexes(dangles='none', MaxStrands=2, quiet=True) eq2.complexes(dangles='none', MaxStrands=2, quiet=True) eq1.concentrations(concentrations=[1e-6] + concent, quiet=True) eq2.concentrations(concentrations=[1e-9] + concent, quiet=True) (g1, eq1) = self.eqcon(eq1, guide) (g2, eq2) = self.eqcon(eq2, guide) self.eq1 = eq1 self.eq2 = eq2 self.__GUIDES__ = [g1, g2]