def first_step_simulation(strand_seq, trials, T=25, material="DNA"): print "Running %d first step mode simulations for %s (with Boltzmann sampling)..." % (trials, strand_seq) # Using domain representation makes it easier to write secondary structures. onedomain = Domain(name="itall",sequence=strand_seq) top = Strand(name="top",domains=[onedomain]) bot = top.C # Note that the structure is specified to be single stranded, but this will be over-ridden when Boltzmann sampling is turned on. start_complex_top = Complex(strands=[top],structure=".") start_complex_bot = Complex(strands=[bot],structure=".") start_complex_top.boltzmann_count = trials start_complex_bot.boltzmann_count = trials start_complex_top.boltzmann_sample = True start_complex_bot.boltzmann_sample = True # Turns Boltzmann sampling on for this complex and also does sampling more efficiently by sampling 'trials' states. # Stop when the exact full duplex is achieved. (No breathing!) success_complex = Complex(strands=[top, bot],structure="(+)") success_stop_condition = StopCondition("SUCCESS",[(success_complex,Exact_Macrostate,0)]) # Declare the simulation unproductive if the strands become single-stranded again. failed_complex = Complex(strands = [top], structure=".") failed_stop_condition = StopCondition("FAILURE",[(failed_complex,Dissoc_Macrostate,0)]) o = Options(simulation_mode="First Step",parameter_type="Nupack", substrate_type=material, rate_method = "Metropolis", num_simulations = trials, simulation_time=1.0, dangles = "Some", temperature = T, rate_scaling = "Calibrated", verbosity = 0) o.start_state = [start_complex_top, start_complex_bot] o.stop_conditions = [success_stop_condition,failed_stop_condition] # Now go ahead and run the simulations. initialize_energy_model(o) # concentration changes, so we must make sure energies are right s = SimSystem(o) s.start() dataset = o.interface.results # Now determine the reaction model parameters from the simulation results. (Simplified from hybridization_first_step_mode.py.) collision_rates = np.array( [i.collision_rate for i in dataset] ) was_success = np.array([1 if i.tag=="SUCCESS" else 0 for i in dataset]) was_failure = np.array([0 if i.tag=="SUCCESS" else 1 for i in dataset]) forward_times = np.array( [i.time for i in dataset if i.tag == "SUCCESS"] ) reverse_times = np.array( [i.time for i in dataset if i.tag == "FAILURE" or i.tag == None] ) # Calculate first-order rate constants for the duration of the reactions (both productive and unproductive). k2 = 1.0/np.mean(forward_times) k2prime = 1.0/np.mean(reverse_times) # Calculate second-order rate constants for productive and unproductive reactions. k1 = np.mean( collision_rates * was_success ) k1prime = np.mean( collision_rates * was_failure ) return k1, k2, k1prime, k2prime
def first_step_simulation(strand_seq, num_traj, T=25, rate_method_k_or_m="Metropolis", concentration=50e-9, material="DNA"): # Run the simulations print "Running %d first step mode simulations for %s (with Boltzmann sampling)..." % (num_traj,strand_seq) o = create_setup(strand_seq, num_traj, T, rate_method_k_or_m, material) initialize_energy_model(o) # Prior simulations could have been for different temperature, material, etc. # But Multistrand "optimizes" by sharing the energy model parameters from sim to sim. # So if in the same python session you have changed parameters, you must re-initialize. s = SimSystem(o) s.start() return o
def first_passage_dissociation(strand_seq, trials, T=25, material="DNA"): print "Running %d first passage time simulations for dissociation of %s..." % ( trials, strand_seq) # Using domain representation makes it easier to write secondary structures. onedomain = Domain(name="itall", sequence=strand_seq) top = Strand(name="top", domains=[onedomain]) bot = top.C single_strand_top = Complex(strands=[top], structure=".") single_strand_bot = Complex(strands=[bot], structure=".") duplex_complex = Complex(strands=[top, bot], structure="(+)") # Declare the simulation complete if the strands become single-stranded again. success_stop_condition = StopCondition( "SUCCESS", [(single_strand_top, Dissoc_Macrostate, 0)]) o = Options( simulation_mode="First Passage Time", parameter_type="Nupack", substrate_type=material, rate_method="Metropolis", num_simulations=trials, simulation_time=10.0, join_concentration= 1e-6, # 1 uM concentration, but doesn't matter for dissociation dangles="Some", temperature=T, rate_scaling="Calibrated", verbosity=0) o.start_state = [duplex_complex] o.stop_conditions = [success_stop_condition] # Now go ahead and run the simulations. initialize_energy_model( o) # concentration changes, so we must make sure energies are right s = SimSystem(o) s.start() dataset = o.interface.results times = np.array([i.time for i in dataset]) timeouts = [i for i in dataset if not i.tag == 'SUCCESS'] if len(timeouts) > 0: print "Warning: %d of %d dissociation trajectories did not finishin allotted %g seconds..." % ( len(timeouts), len(times), 10.0) for i in timeouts: assert (i.tag == Literals.time_out) assert (i.time >= 10.0) krev = 1.0 / np.mean(times) return krev
def first_passage_association(strand_seq, trials, concentration, T=25, material="DNA"): print "Running %d first passage time simulations for association of %s at %s..." % (trials, strand_seq, concentration_string(concentration)) # Using domain representation makes it easier to write secondary structures. onedomain = Domain(name="itall",sequence=strand_seq) top = Strand(name="top",domains=[onedomain]) bot = top.C duplex_complex = Complex(strands=[top, bot],structure="(+)") single_strand_top = Complex(strands=[top],structure=".") single_strand_bot = Complex(strands=[bot],structure=".") # Start with Boltzmann-sampled single-strands... it only seems fair. single_strand_top.boltzmann_count = trials single_strand_bot.boltzmann_count = trials single_strand_top.boltzmann_sample = True single_strand_bot.boltzmann_sample = True # Declare the simulation complete if the strands become a perfect duplex. success_stop_condition = StopCondition("SUCCESS",[(duplex_complex,Exact_Macrostate,0)]) o = Options(simulation_mode="First Passage Time",parameter_type="Nupack", substrate_type=material, rate_method = "Metropolis", num_simulations = trials, simulation_time=10.0, join_concentration=concentration, dangles = "Some", temperature = T, rate_scaling = "Calibrated", verbosity = 0) o.start_state = [single_strand_top, single_strand_bot] o.stop_conditions = [success_stop_condition] # Now go ahead and run the simulations. initialize_energy_model(o) # concentration changes, so we must make sure energies are right s = SimSystem(o) s.start() dataset = o.interface.results times = np.array([i.time for i in dataset]) timeouts = [i for i in dataset if not i.tag == 'SUCCESS'] if len(timeouts)>0 : print "some association trajectories did not finish..." for i in timeouts : assert (i.type_name=='Time') assert (i.tag == None ) assert (i.time >= 10.0) print "average completion time = %g seconds at %s" % (np.mean(times),concentration_string(concentration)) keff = 1.0/np.mean( times )/concentration return keff
def transition_mode_simulation(strand_seq, duration, concentration, T=25, material="DNA"): print "Running %g seconds of transition mode simulations of %s at %s..." % (duration, strand_seq, concentration_string(concentration)) # Using domain representation makes it easier to write secondary structures. onedomain = Domain(name="itall",sequence=strand_seq) top = Strand(name="top",domains=[onedomain]) bot = top.C duplex_complex = Complex(strands=[top, bot],structure="(+)") single_strand_top = Complex(strands=[top],structure=".") single_strand_bot = Complex(strands=[bot],structure=".") # Declare macrostates single_stranded_macrostate = Macrostate("SINGLE",[(single_strand_top,Dissoc_Macrostate,0)]) duplex_macrostate = Macrostate("DUPLEX",[(duplex_complex,Loose_Macrostate,4)]) o = Options(simulation_mode="Transition",parameter_type="Nupack", substrate_type=material, rate_method = "Metropolis", num_simulations = 1, simulation_time=float(duration), # time must be passed as float, not int join_concentration=concentration, dangles = "Some", temperature = T, rate_scaling = "Calibrated", verbosity = 0) o.start_state = [single_strand_top, single_strand_bot] o.stop_conditions = [single_stranded_macrostate, duplex_macrostate] # not actually stopping, just tracking # Now go ahead and run the simulations until time-out. initialize_energy_model(o) # concentration changes, so we must make sure energies are right s = SimSystem(o) s.start() # Now make sense of the results. transition_dict = parse_transition_lists(o.interface.transition_lists) print_transition_dict( transition_dict, o ) # A is SINGLE, B is DUPLEX N_AtoA = float(len( transition_dict['A -> A'] )) if 'A -> A' in transition_dict else 0 dT_AtoA = np.mean( transition_dict['A -> A'] ) if N_AtoA > 0 else 1 # will be mult by zero in that case N_AtoB = float(len( transition_dict['A -> B'] )) if 'A -> B' in transition_dict else 0 dT_AtoB = np.mean( transition_dict['A -> B'] ) if N_AtoB > 0 else 1 N_BtoB = float(len( transition_dict['B -> B'] )) if 'B -> B' in transition_dict else 0 dT_BtoB = np.mean( transition_dict['B -> B'] ) if N_BtoB > 0 else 1 N_BtoA = float(len( transition_dict['B -> A'] )) if 'B -> A' in transition_dict else 0 dT_BtoA = np.mean( transition_dict['B -> A'] ) if N_BtoA > 0 else 1 keff = 1.0/(dT_AtoB + (N_AtoA/N_AtoB)*dT_AtoA)/concentration if N_AtoB > 0 else None krev = 1.0/(dT_BtoA + (N_BtoB/N_BtoA)*dT_BtoB) if N_BtoA > 0 else None return keff, krev
def first_step_simulation(strand_seq, num_traj, T=25, rate_method_k_or_m="Metropolis", concentration=50e-9, material="DNA"): # Run the simulations print "Running first step mode simulations for %s (with Boltzmann sampling)..." % (strand_seq) o = create_setup(strand_seq, num_traj, T, rate_method_k_or_m, material) initialize_energy_model(o) # Prior simulations could have been for different temperature, material, etc. # But Multistrand "optimizes" by sharing the energy model parameters from sim to sim. # So if in the same python session you have changed parameters, you must re-initialize. s = SimSystem(o) s.start() dataset = o.interface.results # You might be interested in examining the data manually when num_traj < 10 # for i in dataset: # print i.type_name # print i # Extract the timing information for successful and failed runs print print "Inferred rate constants with analytical error bars:" N_forward, N_reverse, kcoll, forward_kcoll, reverse_kcoll, k1, k2, k1prime, k2prime, keff, zcrit = compute_rate_constants(dataset,concentration) # Bootstrapping is a technique that estimates statistical properties by assuming that the given samples adequately represent the true distribution, # and then resampling from that distribution to create as many mock data sets as you want. The variation of statistical quantities # in the mock data sets are often a good estimate of the true values. # We rely on bootstrapping to get error bars for k_eff, and to validate our estimated error bars for k2 and k2prime. Nfs, Nrs, kcfs, kcrs, k1s, k2s, k1primes, k2primes, keffs, zcrits = ([],[],[],[],[],[],[],[],[],[]) for i in range(1000): t_dataset = resample_with_replacement(dataset,len(dataset)) t_N_forward, t_N_reverse, t_kcoll, t_forward_kcoll, t_reverse_kcoll, t_k1, t_k2, t_k1prime, t_k2prime, t_keff, t_zcrit = \ compute_rate_constants(t_dataset, concentration, printit=False) Nfs.append(t_N_forward) Nrs.append(t_N_reverse) kcfs.append(t_forward_kcoll) kcrs.append(t_reverse_kcoll) k1s.append(t_k1) k2s.append(t_k2) k1primes.append(t_k1prime) k2primes.append(t_k2prime) keffs.append(t_keff) zcrits.append(t_zcrit) std_Nfs = np.std(Nfs) std_Nrs = np.std(Nrs) std_kcfs = np.std(kcfs) std_kcrs = np.std(kcrs) std_k1 = np.std(k1s) std_k2 = np.std(k2s) std_k1prime = np.std(k1primes) std_k2prime = np.std(k2primes) std_keff = np.std(keffs) std_zcrit = np.std(zcrits) print print "Re-sampled rate constants with bootstrapped error bars:" if True: print "N_forward = %d +/- %g" % (t_N_forward, std_Nfs) print "N_reverse = %d +/- %g" % (t_N_reverse, std_Nrs) print "k_collision_forward = %g +/- %g /M/s (i.e. +/- %g %%)" % (t_forward_kcoll, std_kcfs, 100*std_kcfs/forward_kcoll) print "k_collision_reverse = %g +/- %g /M/s (i.e. +/- %g %%)" % (t_reverse_kcoll, std_kcrs, 100*std_kcrs/reverse_kcoll) print "k1 = %g +/- %g /M/s (i.e. +/- %g %%)" % (t_k1,std_k1,100*std_k1/k1) print "k2 = %g +/- %g /s (i.e. +/- %g %%)" % (t_k2,std_k2,100*std_k2/k2) print "k1prime = %g +/- %g /M/s (i.e. +/- %g %%)" % (t_k1prime,std_k1prime,100*std_k1prime/k1prime) print "k2prime = %g +/- %g /s (i.e. +/- %g %%)" % (t_k2prime,std_k2prime,100*std_k2prime/k2prime) print "k_eff = %g +/- %g /M/s (i.e. +/- %g %%) at %s" % (t_keff,std_keff,100*std_keff/keff,concentration_string(concentration)) print "z_crit = %s +/- %s (i.e. +/- %g %%)" % (concentration_string(t_zcrit),concentration_string(std_zcrit),100*std_zcrit/zcrit) print return [N_forward, N_reverse, k1, k1prime, k2, k2prime, keff, zcrit, o]
import multistrand_setup try: from multistrand.objects import * from multistrand.options import Options from multistrand.system import energy, initialize_energy_model except ImportError: print("Could not import Multistrand.") raise ############# o = Options(temperature=25, dangles="Some") # prepares for simulation. initialize_energy_model( o ) # necessary if you want to use energy() without running a simulation first. # see more about the energy model usage and initialization in threewaybm_trajectories.py # More meaningful names for argument values to the energy() function call, below. Loop_Energy = 0 # requesting no dG_assoc or dG_volume terms to be added. So only loop energies remain. Volume_Energy = 1 # requesting dG_volume but not dG_assoc terms to be added. No clear interpretation for this. Complex_Energy = 2 # requesting dG_assoc but not dG_volume terms to be added. This is the NUPACK complex microstate energy, sans symmetry terms. Tube_Energy = 3 # requesting both dG_assoc and dG_volume terms to be added. Summed over complexes, this is the system state energy. # Sequence is from Schaeffer's PhD thesis, chapter 7, figure 7.1 # Just for illustration, create a hairping strand with just the outermost 4 base pairs of the stem formed: c = Complex(strands=[Strand(name="hairpin", sequence="GTTCGGGCAAAAGCCCGAAC")], structure='((((' + 12 * '.' + '))))') energy([c], o, Complex_Energy) # should be -1.1449...