def extract_data(self, min_time=None, max_time=None, max_step=None): """ Extract the data from simulations used in this analysis. """ # If possible, avoid expensive I/O files_already_read = False if len(self.anchor_stats_list) > 0: files_already_read = True timestep = self.model.get_timestep() for alpha, anchor in enumerate(self.model.anchors): if anchor.bulkstate: continue if self.model.get_type() == "mmvt": if max_step is not None: max_time = max_step * timestep else: if max_step is not None: max_time = max_step # These contain only alias_id keys, not the true id values if not files_already_read: if self.model.get_type() == "mmvt": anchor_stats = mmvt_analyze.MMVT_anchor_statistics(alpha) elif self.model.get_type() == "elber": anchor_stats = elber_analyze.Elber_anchor_statistics(alpha) else: anchor_stats = self.anchor_stats_list[alpha] if anchor.md: output_file_glob = os.path.join( self.model.anchor_rootdir, anchor.directory, anchor.production_directory, anchor.md_output_glob) output_file_list = glob.glob(output_file_glob) output_file_list = base.order_files_numerically( output_file_list) if self.model.openmm_settings is not None: anchor_stats.read_output_file_list( "openmm", output_file_list, min_time, max_time, anchor, timestep) elif self.model.namd_settings is not None: anchor_stats.read_output_file_list( "namd", output_file_list, min_time, max_time, anchor, timestep) else: raise Exception("Both OpenMM and NAMD settings missing. "\ "One of these must be present in the "\ "model XML.") else: pass if not files_already_read: self.anchor_stats_list.append(anchor_stats) return
def analyze_bd_only(model, data_sample): """ If there are missing MD statistics, then perhaps only a BD analysis should be performed. This function only performs a BD analysis on a particular data sample. """ if model.k_on_info is None: return output_file_glob = os.path.join( model.anchor_rootdir, model.k_on_info.b_surface_directory, model.k_on_info.bd_output_glob) output_file_list = glob.glob(output_file_glob) output_file_list = base.order_files_numerically(output_file_list) data_sample.bd_transition_counts = get_bd_transition_counts(model) return
def test_order_files_numerically(): string_list = [ "/path/to/anchor0/output0_0", "/path/to/anchor0/output0_1", "/path/to/anchor0/output0_2", "/path/to/anchor0/output1_0", "/path/to/anchor0/output1_1", "/path/to/anchor0/output1_2", "/path/to/anchor1/output0_0", "/path/to/anchor1/output0_1", "/path/to/anchor1/output2_0", "/path/to/anchor1/output10_0" ] desired_list = string_list[:] random.shuffle(string_list) ordered_list = base.order_files_numerically(string_list) for item1, item2 in zip(ordered_list, desired_list): assert item1 == item2 return
def get_bd_transition_counts(model): """ Obtain how many transitions have occurred in the BD stage. """ assert model.using_bd(), "No valid BD program settings provided." output_file_glob = os.path.join( model.anchor_rootdir, model.k_on_info.b_surface_directory, model.k_on_info.bd_output_glob) output_file_list = glob.glob(output_file_glob) output_file_list = base.order_files_numerically(output_file_list) compute_rate_constant_program = os.path.join( model.browndye_settings.browndye_bin_dir, "compute_rate_constant") bd_transition_counts = {} if len(output_file_list) > 0: k_ons_src, k_on_errors_src, reaction_probabilities, \ reaction_probability_errors, transition_counts = \ common_analyze.browndye_run_compute_rate_constant( compute_rate_constant_program, output_file_list, sample_error_from_normal=False) bd_transition_counts["b_surface"] = transition_counts return bd_transition_counts
def calculate_kinetics(self, pre_equilibrium_approx=False, bd_sample_from_normal=False): """ Once the rate matrix Q is computed, determine the timescales and probabilities of transfers between different states. Fill out all kinetics quantities. Parameters: ----------- pre_equilibrium_approx : bool, default False Whether to use the pre-equilibrium approximation for computing kinetics. bd_sample_from_normal : bool, default False If set to True, then k-on quantities will have a random fluctuation introduced in a magnitude proportional to k-on errors. This is used only for error estimations. """ end_milestones = [] bulk_milestones = [] MFPTs = {} k_off = 0.0 k_ons = {} for alpha, anchor in enumerate(self.model.anchors): if anchor.endstate: for milestone_id in anchor.get_ids(): if self.model.get_type() == "elber": if anchor.alias_from_id(milestone_id) == 3: # TODO: hacky continue end_milestones.append(milestone_id) if anchor.bulkstate: for milestone_id in anchor.get_ids(): bulk_milestones.append(milestone_id) # first, make the bulk state the sink state to compute k_offs Q_hat = self.Q[:, :] p_i_hat = self.p_i[:] #if self.model.k_on_info: # K_hat = self.K[:,:] n = len(self.Q) for bulk_milestone in sorted(bulk_milestones, reverse=True): Q_hat = minor2d(Q_hat, bulk_milestone, bulk_milestone) p_i_hat = minor1d(p_i_hat, bulk_milestone) Q_hat = Q_hat.astype(dtype=np.longdouble) if pre_equilibrium_approx: lowest_p_i = np.min(self.p_i) lowest_i = np.argmin(self.p_i) assert lowest_p_i >= 0.0, \ "Negative stationary probability detected." if lowest_i == n - 1: k_off = lowest_p_i * Q_hat[lowest_i - 1, lowest_i] else: k_off = lowest_p_i * Q_hat[lowest_i, lowest_i + 1] bulk_times = np.ones(p_i_hat.shape) / k_off else: #negative_unity = np.zeros((len(Q_hat)), dtype=np.longdouble) #negative_unity[:] = -1.0 #bulk_times = la.solve(Q_hat, negative_unity) bulk_times = solve_rate_matrix(Q_hat) for end_milestone in end_milestones: if end_milestone in bulk_milestones: continue # must account for the removal of bulk state to matrix indices no_bulk_index = end_milestone for bulk_milestone in bulk_milestones: if end_milestone > bulk_milestone: no_bulk_index -= 1 mfpt = bulk_times[no_bulk_index] MFPTs[(end_milestone, "bulk")] = mfpt MFPT_to_bulk = 0 assert bulk_times.shape == p_i_hat.shape for i, bulk_time in enumerate(bulk_times): MFPT_to_bulk += bulk_time * p_i_hat[i] # convert to 1/s k_off = 1.0e12 / MFPT_to_bulk # Next, compute the MFPTs between different states for end_milestone_dest in end_milestones: if end_milestone_dest in bulk_milestones: continue Q_hat = minor2d(self.Q[:], end_milestone_dest, end_milestone_dest) #I = np.zeros((len(Q_hat)), dtype = float) #I[:] = 1.0 #end_state_times = la.solve(Q_hat, -I) end_state_times = solve_rate_matrix(Q_hat) for end_milestone_src in end_milestones: if end_milestone_dest == end_milestone_src: # don't get the MFPT from a milestone to itself continue if end_milestone_src in bulk_milestones: # a bulk milestone will never be a source continue mfpt = end_state_times[end_milestone_src] MFPTs[(end_milestone_src, end_milestone_dest)] = mfpt if self.model.k_on_info: #if self.model.get_type() == "elber": # K_hat = make_elber_K_matrix(self.K) # for end_milestone in end_milestones: # K_hat[end_milestone, :] = 0.0 # K_hat[end_milestone, end_milestone] = 1.0 #else: # K_hat = self.K[:,:] # for end_milestone in end_milestones: # K_hat[end_milestone, :] = 0.0 # K_hat[end_milestone, end_milestone] = 1.0 K_hat = self.K[:, :] for end_milestone in end_milestones: K_hat[end_milestone, :] = 0.0 K_hat[end_milestone, end_milestone] = 1.0 p_i_hat = self.p_i[:] n = K_hat.shape[0] source_vec = np.zeros((n, 1)) output_file_glob = os.path.join( self.model.anchor_rootdir, self.model.k_on_info.b_surface_directory, self.model.k_on_info.bd_output_glob) output_file_list = glob.glob(output_file_glob) output_file_list = base.order_files_numerically(output_file_list) if len(output_file_list) > 0: if self.model.browndye_settings is not None: k_ons_src, k_on_errors_src, reaction_probabilities, \ reaction_probability_errors, transition_counts = \ browndye_run_compute_rate_constant(os.path.join( self.model.browndye_settings.browndye_bin_dir, "compute_rate_constant"), output_file_list, sample_error_from_normal=bd_sample_from_normal) self.bd_transition_counts["b_surface"] = transition_counts else: raise Exception("No valid BD program settings provided.") if len(bulk_milestones) > 0: bulk_milestone = bulk_milestones[0] for bd_milestone in self.model.k_on_info.bd_milestones: bd_results_file = os.path.join( self.model.anchor_rootdir, bd_milestone.directory, "results.xml") if not os.path.exists(bd_results_file): bd_directory_list_glob = os.path.join( self.model.anchor_rootdir, bd_milestone.directory, "first_hitting_point_distribution", "lig*/") bd_directory_list = glob.glob( bd_directory_list_glob) if len(bd_directory_list) == 0: continue combine_fhpd_results(bd_milestone, bd_directory_list, bd_results_file) source_index = bd_milestone.outer_milestone.index source_vec[source_index] = k_ons_src[source_index] results_filename_list = [bd_results_file] transition_probabilities, transition_counts = \ browndye_parse_bd_milestone_results( results_filename_list) self.bd_transition_counts[bd_milestone.index] \ = transition_counts #src_index = bd_milestone.outer_milestone.index K_hat[source_index, :] = 0.0 for key in transition_probabilities: value = transition_probabilities[key] if key in ["escaped", "stuck"]: pass else: K_hat[source_index, key] = value K_hat_inf = np.linalg.matrix_power(K_hat, MATRIX_EXPONENTIAL) end_k_ons = np.dot(K_hat_inf.T, source_vec) for end_milestone in end_milestones: k_ons[end_milestone] = end_k_ons[end_milestone] self.K_hat = K_hat self.k_ons = k_ons self.Q_hat = Q_hat #self.p_i_hat = p_i_hat # TODO: remove after successful CI test self.MFPTs = MFPTs self.k_off = k_off return
def prepare(self, restart=False, save_state_file=False, save_state_boundaries=False, force_overwrite=False, umbrella_restart_mode=False, swarm_index=None): """ This function gets run before the sim_openmm object is created so that the proper paths can be found, etc. """ settings = self.model.openmm_settings assert settings is not None, "This model was not prepared for OpenMM." restart_index = 1 if swarm_index is None: self.swarm_string = "" else: self.swarm_string = ".swarm_{}".format(swarm_index) self.glob = "%s%s*.%s" % (mmvt_base.OPENMMVT_BASENAME, self.swarm_string, mmvt_base.OPENMMVT_EXTENSION) output_files_glob = os.path.join( self.output_directory, self.glob) output_restarts_list = glob.glob(output_files_glob) if restart: output_restarts_list = base.order_files_numerically( output_restarts_list) assert len(output_restarts_list) > 0, \ "No simulation has yet been run: cannot use restart mode." if self.model.get_type() == "mmvt": self.start_bounce_counter = get_last_bounce( output_restarts_list[-1]) if self.start_bounce_counter is None: self.start_bounce_counter = 0 restart_index = len(output_restarts_list) + 1 default_output_filename = os.path.join( self.output_directory, "%s%s.restart%d.%s" % (self.basename, self.swarm_string, restart_index, self.extension)) else: if len(output_restarts_list) > 0: if not force_overwrite and not umbrella_restart_mode: print("This anchor already has existing output files "\ "and the entered command would overwrite them. "\ "If you desire to overwrite the existing files, "\ "then use the --force_overwrite (-f) option, and "\ "all outputs will be deleted and replace by a new "\ "run.") raise Exception("Cannot overwrite existing outputs.") elif force_overwrite: cleanse_anchor_outputs(self.model, self.anchor) else: cleanse_anchor_outputs(self.model, self.anchor, skip_umbrella_files=True) # check if umbrellas exist if self.model.get_type() == "elber": anchor_has_umbrella_files = elber_anchor_has_umbrella_files( self.model, self.anchor) assert not force_overwrite or not umbrella_restart_mode, \ "The options force_overwrite and umbrella_restart_mode "\ "may not both be activated at the same time." if umbrella_restart_mode: assert anchor_has_umbrella_files, "Cannot use umbrella "\ "restart mode if umbrella files don't exist for "\ "anchor {}.".format(self.anchor.index) if anchor_has_umbrella_files and (not force_overwrite \ or umbrella_restart_mode): self.umbrellas_already_exist_mode = True default_output_filename = os.path.join( self.output_directory, "%s%s.restart%d.%s" % (self.basename, self.swarm_string, 1, self.extension)) state_dir = os.path.join(self.output_directory, SAVE_STATE_DIRECTORY) self.save_one_state_for_all_boundaries = save_state_boundaries if self.save_one_state_for_all_boundaries: if not os.path.exists(state_dir): os.mkdir(state_dir) self.state_prefix = os.path.join(state_dir, SAVE_STATE_PREFIX) if save_state_file: state_prefix = self.state_prefix self.save_all_states = True if not os.path.exists(state_dir): os.mkdir(state_dir) else: state_prefix = None self.save_all_states = False restart_checkpoint_basename \ = RESTART_CHECKPOINT_FILENAME + self.swarm_string self.restart_checkpoint_filename = os.path.join( self.output_directory, restart_checkpoint_basename) return default_output_filename, state_prefix, restart_index
def parse_browndye_results(self, bd_sample_from_normal=False): """ Parse Browndye2 output files to fill out the milestoning model. Parameters: ----------- bd_sample_from_normal : bool, default False If set to True, then k-on quantities will have a random fluctuation introduced in a magnitude proportional to k-on errors. This is used only for error estimations. """ b_surface_output_file_glob = os.path.join( self.model.anchor_rootdir, self.model.k_on_info.b_surface_directory, self.model.k_on_info.bd_output_glob) output_file_list = glob.glob(b_surface_output_file_glob) output_file_list = base.order_files_numerically(output_file_list) if len(output_file_list) > 0: if self.model.browndye_settings is not None: k_ons_src, k_on_errors_src, reaction_probabilities, \ reaction_probability_errors, transition_counts = \ browndye_run_compute_rate_constant(os.path.join( self.model.browndye_settings.browndye_bin_dir, "compute_rate_constant"), output_file_list, sample_error_from_normal=bd_sample_from_normal) self.bd_transition_counts["b_surface"] = transition_counts self.bd_transition_probabilities["b_surface"] \ = reaction_probabilities self.b_surface_k_ons_src = k_ons_src self.b_surface_b_surface_k_on_errors_src = k_on_errors_src else: raise Exception("No valid BD program settings provided.") if len(self.model.k_on_info.bd_milestones) > 0 \ and len(output_file_list) > 0: for bd_milestone in self.model.k_on_info.bd_milestones: transition_counts_bd_milestone = defaultdict(int) transition_probabilities_bd_milestone = defaultdict(float) inner_milestone_index = bd_milestone.inner_milestone.index outer_milestone_index = bd_milestone.outer_milestone.index assert inner_milestone_index in transition_counts assert outer_milestone_index in transition_counts transition_counts_bd_milestone[inner_milestone_index] \ = transition_counts[inner_milestone_index] transition_counts_bd_milestone["escaped"] \ = transition_counts[outer_milestone_index] \ - transition_counts[inner_milestone_index] transition_counts_bd_milestone["total"] \ = transition_counts[outer_milestone_index] if transition_counts_bd_milestone["escaped"] == 0: transition_probabilities_bd_milestone[ inner_milestone_index] = 1.0 transition_probabilities_bd_milestone["escaped"] \ = 0 else: transition_probabilities_bd_milestone[ inner_milestone_index] \ = transition_counts_bd_milestone[ inner_milestone_index] \ / transition_counts_bd_milestone["total"] transition_probabilities_bd_milestone["escaped"] \ = transition_counts_bd_milestone["escaped"] \ / transition_counts_bd_milestone["total"] self.bd_transition_counts[bd_milestone.index] \ = transition_counts_bd_milestone self.bd_transition_probabilities[bd_milestone.index] \ = transition_probabilities_bd_milestone return