def test_3nch_serial_resSeq(): # If you use zero-based indexing, this PDB has quite large gaps in residue and atom numbering, so it's a good test case. See #528 # Gold standard values obtained via # cat 3nch.pdb |grep ATM|tail -n 5 # HETATM19787 S SO4 D 804 -4.788 -9.395 22.515 1.00121.87 S # HETATM19788 O1 SO4 D 804 -3.815 -9.511 21.425 1.00105.97 O # HETATM19789 O2 SO4 D 804 -5.989 -8.733 21.999 1.00116.13 O # HETATM19790 O3 SO4 D 804 -5.130 -10.726 23.043 1.00108.74 O # HETATM19791 O4 SO4 D 804 -4.210 -8.560 23.575 1.00112.54 O t1 = load_pdb(get_fn('3nch.pdb.gz')) top, bonds = t1.top.to_dataframe() top2 = Topology.from_dataframe(top, bonds) eq(t1.top, top2) top = top.set_index('serial') # Index by the actual data in the PDB eq(str(top.ix[19791]["name"]), "O4") eq(str(top.ix[19787]["name"]), "S") eq(str(top.ix[19787]["resName"]), "SO4") eq(int(top.ix[19787]["resSeq"]), 804)
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology mapping_function: string, default='com': how to map xyz coordinates options: %s Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError( "Must supply a list of bead labels of the same length as a list of selected atom indices" ) for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError( "Specified bead label '%s' is not valid, must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s" % mapping_options.keys()) map_coords = mapping_options[mapping_function] if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError( "Supplied chain_list must be of the same length as a list of selected atom indices" ) if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError( "Supplied segment_id_list must be of the same length as a list of selected atom indices" ) if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError( "Supplied resSeq_list must be of the same length as a list of selected atom indices" ) n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) topology_labels = [] element_label_dict = {} xyz_i = np.zeros((trj.xyz.shape[0], trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] #xyz_i = map_coords(trj,atom_indices) masses_i = np.array( [a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]], dtype=np.float64) map_coords(xyz_i, trj.xyz, atom_indices, masses_i, unitcell_lengths=trj.unitcell_lengths) xyz[:, i, :] = xyz_i if "forces" in trj.__dict__ and len(trj.forces) > 0: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, split_shared_atoms=False, mod_weights_list=None, mapping_function="com", charge_tol=1e-5, center_postwrap=False): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology split_shared_atoms: boolean If specified, check to see if atoms are shared per molecule in beads. If so, equally divide their weight accordingly for each bead. mapping_function: string, default='com': how to map xyz coordinates options: %s center_postwrap: Boolean Whether to wrap the CG system after it is mapped. Assumes that box is centered at 0, and only has effect if periodic information is present. Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError("Must supply a list of bead labels of the " "same length as a list of selected atom indices") for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError("Specified bead label '%s' is not valid, \ must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s"\ %mapping_options.keys()) if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError("Supplied chain_list must be of the same length " "as a list of selected atom indices") if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError("Supplied segment_id_list must be of the same " "length as a list of selected atom indices") if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError("Supplied resSeq_list must be of the same " "length as a list of selected atom indices") n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] #total masse for each cg bead. masses = np.zeros((n_beads), dtype=np.float64) #list of masses for elements in cg bead. masses_i = [] #masses for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_masses = np.array([]) for jj in atom_indices: temp_masses = np.append(temp_masses, trj.top.atom(jj).element.mass) masses_i.append(temp_masses) masses[ii] = masses_i[ii].sum() if hasattr(trj.top.atom(1), 'charge'): #total charge for each cg bead. charges = np.zeros((n_beads), dtype=np.float64) #lists of charges for in current cg bead charges_i = [] #charges for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_charges = np.array([]) for jj in atom_indices: temp_charges = np.append(temp_charges, trj.top.atom(jj).charge) charges_i.append(temp_charges) charges[ii] = charges_i[ii].sum() forcenorm_i = [] if mapping_function == 'cof' or mapping_function == 'center_of_force': for ii in range(n_beads): atom_indices = atom_indices_list[ii] forcenorm_i.append(get_forcenorms(trj, atom_indices)) if mapping_function == 'coc' or mapping_function == 'center_of_charge': for charge in charges: if np.absolute(charge) < charge_tol: raise ValueError("Total charge on site %i is near zero" % ii) topology_labels = [] element_label_dict = {} if (split_shared_atoms): mod_weights_list = gen_unique_overlap_mod_weights(atom_indices_list) has_forces = False try: trj.__dict__['forces'] test_forces = map_forces(trj, (0, )) has_forces = True except TypeError: print("WARNING: Invalid Forces\nNo Map applied to forces") except KeyError: pass except: print("Unknown error, check your forces\nexiting...") raise for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] xyz_i = xyz[:, i, :] if mapping_function == 'coc' or mapping_function == 'center_of_charge': weights = charges_i[i] elif mapping_function == 'com' or mapping_function == 'center_of_mass': weights = masses_i[i] elif mapping_function == 'cof' or mapping_function == 'center_of_force': weights = forcenorm_i[i] elif mapping_function == 'center': weights = np.ones(len(atom_indices)) if (mod_weights_list is not None): weights[:] = np.multiply(weights, mod_weights_list[i]) compute_center_weighted(xyz_i, trj.xyz, atom_indices, weights, unitcell_lengths=trj.unitcell_lengths, center_postwrap=center_postwrap) if has_forces: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def map_molecules(trj, selection_list, bead_label_list, transfer_labels=False, molecule_types=None, molecule_type_order=False, return_call=False, *args, **kwargs): """ This performs the mapping where each molecule has been assigned a type. Parameters ---------- traj : Trajectory Trajectory to sum forces on selection_list : Indexible collection of strings bead_label_list : Indexible collection transfer_labels : Whether to transfer over labels in @trj. Moves over resSeq, resName for every bead, assuming that the atoms in each bead are uniform in those qualities. molecule_types : Indexible collection of integers molecule_type_order : boolean Specifying molecule_type_order means that the map will be reordered so that all molecules of type 0 come first, then 1, etc. return_call: boolean Whether to return the arguments that cg_by_index would be called with instead of actually calling it. Useful for modifying the call. Returns ------- traj: trajectory trajectory formed by applying given molecular map. -OR- tuple: list of arguments which would be passed to cg_by_index """ ### First, deal with optional arguments and argument validation. if molecule_type_order is True: raise ValueError("molecule_type_order not currently supported.") #if the array of molecule types isn't given, assume 1 molecule type. if molecule_types is None: molecule_types = [0] * trj.top.n_residues n_molecule_types = len(selection_list) if sorted(set(molecule_types)) != list(range(n_molecule_types)): raise ValueError("Error in map molecules, molecule types list must " "contain only and all numbers from 0 to " "n_molecule_types-1.") # if len(molecule_types) != trj.top.n_residues: # raise ValueError("Error in map molecules, molecule types list must " # "have the same length as number of residues.") if len(selection_list) != len(bead_label_list): raise ValueError("Error in map molecules, must submit selection list " "and bead label list of same length.") for i in range(n_molecule_types): if len(selection_list[i]) != len(bead_label_list[i]): raise ValueError("Error in map molecules, selection list %i and " "bead label list %i must be of same length." % (i, i)) ### generate the indices local to each molecule for mapping # get the first molecule index for each molecule type first_molecules = [ molecule_types.index(i) for i in range(n_molecule_types) ] internal_indices_list = [[] for i in range(n_molecule_types)] iterable = zip(selection_list, first_molecules, internal_indices_list) for selection, first_mol, mol_indices in iterable: first_index = trj.top.select("(resid == %i)" % (first_mol)).min() for sel in selection: has_index = sel.find("index") > -1 has_name = sel.find("name") > -1 internal_indices = [] if has_index and has_name: raise ValueError("Error in map molecules, do not specify " "selection by index and by type.") elif has_index: # use atom selection language to parse selection #string containing only indices on whole system, then offset later internal_indices = trj.top.select("%s" % (sel)) elif has_name: # have to un-shift list because this will be added to current id later filter_string = "(resid == %i) and (%s)" % (first_mol, sel) internal_indices = trj.top.select(filter_string) - first_index if len(internal_indices) == 0: raise ValueError( "Error in map_molecules, selection string '%s'" "produced an empty list of atom indices" % sel) mol_indices.append(internal_indices) # get list of type [ (0,r0), (1,r1) etc ] if molecule_type_order is True: residue_list = sorted( enumerate(trj.top.residues),\ key=lambda x: molecule_types[x[0]]) else: residue_list = enumerate(trj.top.residues) index_list = [] resSeq_list = [] label_list = [] start_index = 0 resSeq = 1 for ridx, r in residue_list: molecule_type = molecule_types[ridx] for bead_idx, internal_indices in enumerate( internal_indices_list[molecule_type]): system_indices = internal_indices + start_index index_list.append(system_indices) resSeq_list.append(resSeq) label_list.append(bead_label_list[molecule_type][bead_idx]) resSeq = resSeq + 1 start_index = start_index + r.n_atoms if (return_call is True): arg_list = [trj, index_list, label_list] arg_list.extend(args) arg_list.append(kwargs) return (arg_list) #exit early. cg_trj = cg_by_index(trj, index_list, label_list, *args, **kwargs) #do a more sophisticated labeling. if (transfer_labels is True): df_aa_top = trj.top.to_dataframe()[0] df_cg_top = cg_trj.top.to_dataframe()[0] #get resSeq info. aa_resSeq = df_aa_top.loc[:, 'resSeq'] #find atom indices for first atoms of each residue. res_starting_indices = \ np.sort(np.unique(aa_resSeq,return_index=True)[1]) #get resids and resnames for startings atoms. aa_starting_resids = df_aa_top.loc[res_starting_indices, 'resSeq'] aa_starting_resnames = df_aa_top.loc[res_starting_indices, 'resName'] #needed for duplicating atomistic info across cg molecules n_sites_per_cg = [len(desc) for desc in bead_label_list] #generate and place resids cg_resids = typed_elementwise_rep(aa_starting_resids, molecule_types, n_sites_per_cg) df_cg_top.loc[:, "resSeq"] = cg_resids #generate and place resNames cg_resnames = typed_elementwise_rep(aa_starting_resnames, molecule_types, n_sites_per_cg) df_cg_top.loc[:, "resName"] = cg_resnames #convert and put back. cg_trj.top = Topology.from_dataframe(df_cg_top) return (cg_trj)
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology mapping_function: string, default='com': how to map xyz coordinates options: %s Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """%mapping_options.keys() if not len(atom_indices_list)==len(bead_label_list): raise ValueError("Must supply a list of bead labels of the same length as a list of selected atom indices") for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label)>4 or len(bead_label)<1: raise ValueError("Specified bead label '%s' is not valid, must be a string between 1 and 4 characters"%bead_label) bead_label_list = [ bead_label.upper() for bead_label in bead_label_list ] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s"%mapping_options.keys()) map_coords = mapping_options[mapping_function] if chain_list is None: chain_list = np.ones(len(atom_indices_list),dtype=int) elif len(chain_list)!=len(atom_indices_list): raise ValueError("Supplied chain_list must be of the same length as a list of selected atom indices") if segment_id_list is not None and len(segment_id_list)!=len(atom_indices_list): raise ValueError("Supplied segment_id_list must be of the same length as a list of selected atom indices") if resSeq_list is not None and len(resSeq_list)!=len(atom_indices_list): raise ValueError("Supplied resSeq_list must be of the same length as a list of selected atom indices") n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C') forces = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=np.double,order='C') columns = ["serial","name","element","resSeq","resName","chainID"] masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64) charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64) topology_labels = [] element_label_dict = {} xyz_i = np.zeros((trj.xyz.shape[0],trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C') for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] #xyz_i = map_coords(trj,atom_indices) masses_i = np.array([a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]],dtype=np.float64) map_coords(xyz_i,trj.xyz,atom_indices,masses_i,unitcell_lengths=trj.unitcell_lengths) xyz[:,i,:] = xyz_i if "forces" in trj.__dict__ and len(trj.forces)>0: forces_i = map_forces(trj,atom_indices) forces[:,i,:] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label='%2s'%('B%i'%(len(element_label_dict)%10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper() not in element.Element._elements_by_symbol: element.Element(1000+resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append( [i,bead_label,element_label,resSeq,'%3s'%bead_label,chain_list[i]] ) df = pd.DataFrame(topology_labels,columns=columns) topology = Topology.from_dataframe(df,bonds=bonds) if segment_id_list is not None: for beadidx,bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj