def calc_2d_distribution( cgmodel, file_list, nbin_xvar=180, nbin_yvar=180, frame_start=0, frame_stride=1, frame_end=-1, plotfile="2d_hist.pdf", xvar_name = "bb_bb_bb", yvar_name = "bb_bb_bb_bb", colormap="nipy_spectral", temperature_list=None, ): """ Calculate and plot 2d histogram for any 2 bonded variables, given a CGModel object and pdb or dcd trajectory. :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string :type file_list: str or list(str) :param nbin_xvar: number of bins for x bonded variable :type nbin_xvar: int :param nbin_yvar: number of bins for y bonded variable :type nbin_yvar: :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plotfile: Filename for saving torsion distribution pdf plots :type plotfile: str :param xvar_name: particle sequence of the x bonded parameter (default="bb_bb_bb") :type xvar_name: str :param yvar_name: particle sequence of the y bonded parameter (default="bb_bb_bb_bb") :type yvar_name: str :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral') :type colormap: str (case sensitive) :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used. :type temperature_list: list(Quantity()) :returns: - hist_data ( dict ) - xedges ( dict ) - yedges ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Store angle, torsion values by filename for computing global colormap xvar_val_array = {} yvar_val_array = {} # Store the reverse name of the bonded type (need to check both) # x variable particle_list = [] particle = "" for c in xvar_name: if c == '_': particle_list.append(particle) particle = "" else: particle += c particle_list.append(particle) particle_list_reverse = particle_list[::-1] xvar_name_reverse = "" for par in particle_list_reverse: xvar_name_reverse += par xvar_name_reverse += "_" xvar_name_reverse = xvar_name_reverse[:-1] # y variable particle_list = [] particle = "" for c in yvar_name: if c == '_': particle_list.append(particle) particle = "" else: particle += c particle_list.append(particle) particle_list_reverse = particle_list[::-1] yvar_name_reverse = "" for par in particle_list_reverse: yvar_name_reverse += par yvar_name_reverse += "_" yvar_name_reverse = yvar_name_reverse[:-1] for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # x variable # Determine parameter type of xvar: n_particle_x = xvar_name.count('_')+1 if n_particle_x == 2: # Bond # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list) for i in range(i_bond_type): if inv_bond_dict[str(i+1)] == xvar_name or inv_bond_dict[str(i+1)] == xvar_name_reverse: # Compute all bond length values in trajectory # This returns an [nframes x n_bonds] array xvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)]) # Get equilibrium value: b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0]) # Set bin edges: # This should be the same across all files - use heuristic from equilibrium bond length b_min = 0.5*b_eq.value_in_unit(unit.nanometer) b_max = 1.5*b_eq.value_in_unit(unit.nanometer) xvar_bin_edges = np.linspace(b_min,b_max,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 xlabel = f'{xvar_name} distance ({unit.nanometer})' elif n_particle_x == 3: # Angle # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list) # Set bin edges: xvar_bin_edges = np.linspace(0,180,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == xvar_name or inv_ang_dict[str(i+1)] == xvar_name_reverse: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array xvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # Convert to degrees: xvar_val_array[file] *= (180/np.pi) xlabel = f'{xvar_name} angle (degrees)' elif n_particle_x == 4: # Torsion # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list) # Set bin edges: xvar_bin_edges = np.linspace(-180,180,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == xvar_name or inv_torsion_dict[str(i+1)] == xvar_name_reverse: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array xvar_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: xvar_val_array[file] *= (180/np.pi) xlabel = f'{xvar_name} angle (degrees)' # y variable # Determine parameter type of yvar: n_particle_y = yvar_name.count('_')+1 if n_particle_y == 2: # Bond # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list) for i in range(i_bond_type): if inv_bond_dict[str(i+1)] == yvar_name or inv_bond_dict[str(i+1)] == yvar_name_reverse: # Compute all bond length values in trajectory # This returns an [nframes x n_bonds] array yvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)]) # Get equilibrium value: b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0]) # Set bin edges: # This should be the same across all files - use heuristic from equilibrium bond length b_min = 0.5*b_eq.value_in_unit(unit.nanometer) b_max = 1.5*b_eq.value_in_unit(unit.nanometer) yvar_bin_edges = np.linspace(b_min,b_max,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 ylabel = f'{yvar_name} distance ({unit.nanometer})' elif n_particle_y == 3: # Angle # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list) # Set bin edges: yvar_bin_edges = np.linspace(0,180,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == yvar_name or inv_ang_dict[str(i+1)] == yvar_name_reverse: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array yvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # Convert to degrees: yvar_val_array[file] *= (180/np.pi) ylabel = f'{yvar_name} angle (degrees)' elif n_particle_y == 4: # Torsion # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list) # Set bin edges: yvar_bin_edges = np.linspace(-180,180,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == yvar_name or inv_torsion_dict[str(i+1)] == yvar_name_reverse: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array yvar_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: yvar_val_array[file] *= (180/np.pi) ylabel = f'{yvar_name} angle (degrees)' # Since the bonded variables may have different numbers of observables, we can use all # combinations of the 2 parameter observables to create the histograms. xvar_val_array_combo = {} yvar_val_array_combo = {} # Each array of single observables is [n_frames x n_occurances] # x value arrays should be [xval0_y0, xval1_y0, ...xvaln_y0, ... xval0_yn, xval1_yn, xvaln_yn] # y value arrays should be [yval0_x0, yval0_x1, ...yval0_xn, ... yvaln_x0, yvaln_x1, yvaln_xn] for file in file_list: n_occ_x = xvar_val_array[file].shape[1] n_occ_y = yvar_val_array[file].shape[1] xvar_val_array_combo[file] = np.zeros((nframes,n_occ_x*n_occ_y)) yvar_val_array_combo[file] = np.zeros_like(xvar_val_array_combo[file]) for iy in range(n_occ_y): xvar_val_array_combo[file][:,(iy*n_occ_x):((iy+1)*n_occ_x)] = xvar_val_array[file] for ix in range(n_occ_x): yvar_val_array_combo[file][:,ix+iy*n_occ_x] = yvar_val_array[file][:,iy] # Reshape arrays for histogramming: xvar_val_array_combo[file] = np.reshape(xvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1)) yvar_val_array_combo[file] = np.reshape(yvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1)) # 2d histogram the data and plot: hist_data, xedges, yedges = plot_2d_distribution( file_list, xvar_val_array_combo, yvar_val_array_combo, xvar_bin_edges, yvar_bin_edges, plotfile, colormap, xlabel, ylabel, temperature_list=temperature_list) return hist_data, xedges, yedges
def calc_bond_length_distribution(cgmodel, file_list, nbins=90, frame_start=0, frame_stride=1, frame_end=-1, plot_per_page=2, temperature_list=None, plotfile="bond_hist.pdf"): """ Calculate and plot all bond length distributions from a CGModel object and trajectory :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) :type file_list: str or list(str) :param nbins: number of histogram bins :type nbins: int :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plot_per_page: number of subplots to display on each page (default=2) :type plot_per_page: int :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels. :type temperature_list: list(Quantity()) :param plotfile: filename for saving bond length distribution pdf plots :type plotfile: str :returns: - bond_hist_data ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Create dictionary for saving bond histogram data: bond_hist_data = {} # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list) file_index = 0 for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file, top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # Create inner dictionary for current file: if temperature_list is not None: file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}" else: file_key = file[:-4] bond_hist_data[file_key] = {} for i in range(i_bond_type): # Compute all bond distances in trajectory # This returns an [nframes x n_bonds] array bond_val_array = md.compute_distances(traj, bond_sub_arrays[str(i + 1)]) # Reshape arrays: bond_val_array = np.reshape(bond_val_array, (nframes * n_i[i][0], 1)) # Histogram and plot results: n_out, bin_edges_out = np.histogram(bond_val_array, bins=nbins, density=True) bond_bin_centers = np.zeros((len(bin_edges_out) - 1, 1)) for j in range(len(bin_edges_out) - 1): bond_bin_centers[j] = (bin_edges_out[j] + bin_edges_out[j + 1]) / 2 bond_hist_data[file_key][ f"{inv_bond_dict[str(i+1)]}_density"] = n_out bond_hist_data[file_key][ f"{inv_bond_dict[str(i+1)]}_bin_centers"] = bond_bin_centers file_index += 1 plot_distribution( inv_bond_dict, bond_hist_data, xlabel="Bond length (nm)", ylabel="Probability density", figure_title="Bond distributions", file_name=f"{plotfile}", plot_per_page=plot_per_page, ) return bond_hist_data