def _read_mol2(self, mol2_file): """ Read and parse a mol2 file for coordinates. Args: mol2_file (string): name/path of file """ molecule = None residue = None aindex, old_aindex = None, None section = None for line in open(mol2_file, 'r').readlines(): if line.startswith("@<TRIPOS>"): section = line.replace("@<TRIPOS>", "").strip() if section == "MOLECULE": if molecule != None: self.molecules.append(molecule) molecule = _StructMolecule(self) continue if section == "ATOM": if aindex != None: old_aindex = aindex lf = line.split() aindex, aname = int(lf[0]), lf[1] x, y, z = map(float, lf[2:5]) rindex = int(lf[6]) rname = lf[7][0:4].upper() if old_aindex != None and aindex - old_aindex != 1: raise_or_log( "Bad Mol2 format - atom " "index {} followed by {}".format(old_aindex, aindex), QStructError, logger, self.ignore_errors) if not residue or residue.index_struct != rindex: if residue and rindex - residue.index_struct != 1: raise_or_log( "Bad Mol2 format - residue " "index {} followed by {}".format( residue.index_struct, rindex), QStructError, logger, self.ignore_errors) residue = _StructResidue(rindex, rname, molecule, self) self.residues.append(residue) molecule.add_residue(residue) atom = _StructAtom(aindex, aname, x, y, z, residue, self) self.atoms.append(atom) residue.add_atom(atom) # append last one after parsing if molecule != None and len(molecule.residues) > 0: self.molecules.append(molecule)
def check(self): """Check for missing parameters. Raises QDynInputError if required parameters are missing. (is not all knowing, please don't rely to much on it) If 'ignore_errors' is set, it logs warnings instead. """ # check for nonsense or missing mandatory parameters mdp = self.parameters.get("md", []) fp = self.parameters.get("files", []) ip = self.parameters.get("intervals", []) for keyword in ("temperature", "steps", "stepsize"): if keyword not in mdp: raise_or_log("Missing parameter '{}'".format(keyword), QDynInputError, logger, self._ignore_errors) # fep file and lambdas require each other if ("fep" in fp and "lambdas" not in self.parameters) or \ ("fep" not in fp and "lambdas" in self.parameters): raise_or_log( "Parameter 'fep' requires the 'lambdas' section " "and vice versa", QDynInputError, logger, self._ignore_errors) # when generating new velocities, both parms need to be present if ("initial_temperature" in mdp and "random_seed" not in mdp) or \ ("initial_temperature" not in mdp and "random_seed" in mdp): raise_or_log( "Parameter 'initial_temperature' requires " "'random_seed' and vice versa", QDynInputError, logger, self._ignore_errors) # if a restart file is not defined, we have to generate new velocities if "restart" not in fp and "initial_temperature" not in mdp: raise_or_log( "No restart file, please set 'initial_temperature' " "and 'random_seed' to generate velocities", QDynInputError, logger, self._ignore_errors) # since energies are important let's not rely on default values in Q... # if an energy file is defined, energy interval must be defined if ("energy" not in fp and "energy" in ip) or \ ("energy" in fp and "energy" not in ip): raise_or_log( "'energy' must be defined in both 'intervals' " "and 'files' sections", QDynInputError, logger, self._ignore_errors)
def _add_residue(self, residue): """Internal method for adding residues""" # check for duplicates if residue.name in self.residue_dict: # check if they are the same... orig = self.residue_dict[residue.name].get_str() new = residue.get_str() if orig != new: raise_or_log( "Duplicate library entries for residue '{}' " "with different parameters".format(residue.name), QLibError, logger, self.ignore_errors) else: logger.info("Duplicate library entry for residue '{}'".format( residue.name)) self.residue_dict[residue.name] = residue
def _add_prm(self, prm): # add the parameter 'prm' to the approprate dictionary # Depending on the value of QPrm.ignore_errors: # True - overwrite parameter types with different values # False - raise QPrmError if isinstance(prm, _PrmAtom): prm_dict = self.atom_types elif isinstance(prm, _PrmBond): prm_dict = self.bonds elif isinstance(prm, _PrmAngle): prm_dict = self.angles elif isinstance(prm, _PrmTorsion): if prm.is_generic: prm_dict = self.generic_torsions else: prm_dict = self.torsions elif isinstance(prm, _PrmImproper): if prm.is_generic: prm_dict = self.generic_impropers else: prm_dict = self.impropers else: raise TypeError("'prm' is not of type " "_PrmAtom/Bond/Angle/Torsion/Improper") old_prm = None if prm.prm_id in prm_dict: sa1 = str(prm) sa2 = str(prm_dict[prm.prm_id]) if sa1 != sa2: raise_or_log( "Same parameter types with different " "value (could also be due to non-sequential " "torsion parameters):\n{}\n{}".format(sa1, sa2), QPrmError, logger, self.ignore_errors) old_prm = prm_dict[prm.prm_id] else: logger.warning("Removing duplicate parameter " ":\n{}".format(sa1)) prm_dict[prm.prm_id] = prm return old_prm
def _read_pdb(self, pdb_file): """ Read and parse a PDB file for coordinates. Args: pdb_file (string): name/path of file """ # make a new _StructMolecule object molecule = _StructMolecule(self) # parse the PDB file residue = None aindex, old_aindex = None, None for line in open(pdb_file, 'r').readlines(): if line.startswith("ATOM") or line.startswith("HETATM"): if aindex != None: old_aindex = aindex aindex = int(line[6:12]) if old_aindex != None and aindex - old_aindex != 1: raise_or_log("Bad PDB format - atom " "index {} followed by {}" .format(old_aindex, aindex), QStructError, logger, self.ignore_errors) aname = line[12:17].strip() rname = line[17:20].strip().upper() rindex = int(line[22:26]) x, y, z = map(float, (line[30:38], line[38:46], line[46:54])) if not residue or residue.index_struct != rindex: if residue and rindex - residue.index_struct != 1: raise_or_log("Bad PDB format - residue " "index {} followed by {}" .format(residue.index_struct, rindex), QStructError, logger, self.ignore_errors) residue = _StructResidue(rindex, rname, molecule, self) self.residues.append(residue) molecule.add_residue(residue) if aname in [a.name for a in residue.atoms]: raise_or_log("Bad PDB format - two atoms with same name " "({}) in residue {}.{}" "".format(aname, rname, rindex), QStructError, logger, self.ignore_errors) atom = _StructAtom(aindex, aname, x, y, z, residue, self) self.atoms.append(atom) residue.add_atom(atom) elif line.startswith("TER") or line.startswith("GAP"): self.molecules.append(molecule) residue = None molecule = _StructMolecule(self) # append last one if it didn't gave a TER/GAP if molecule != None and len(molecule.residues) > 0: self.molecules.append(molecule)
def _check_parms(self, parms): # Checks if parameters are supported (typos and such) # and if they are of correct type. for qsection, qsec_parms in six.iteritems(parms): if qsection not in Q_PARAMETERS: raise_or_log("Unsupported section: '{}'".format(qsection), QDynInputError, logger, self._ignore_errors) try: if isinstance(qsec_parms, dict): for key, value in six.iteritems(qsec_parms): exp_type = Q_PARAMETERS[qsection][key] exp_type(value) except KeyError: raise_or_log("Unknown keyword '{}' in section '{}'" "".format(key, qsection), QDynInputError, logger, self._ignore_errors) except ValueError: raise_or_log("Bad value '{}' for parameter '{}' in section " "'{}'".format(value, key, qsection), QDynInputError, logger, self._ignore_errors)
def make_fep(qmap_file, pdb_file, forcefield, parm_files, lib_files, ignore_errors=False): """Generate a template FEP file for EVB simulations in Q. Parses a QMAP file (see below), state 1 structure file (PDB) and all states libraries and parameters, and determines the changes in connectivity/charges/parameters that occur between the states. QMAP is a text file that defines mappings of library ids (for each state) to state 1 structure/topology ids, best explained on an example: q 315.O OHH.O OHH.O q 315.H1 OHH.H1 HIP.HE2 q 315.H2 OHH.H2 OHH.H2 q 155.NE2 HID.NE2 HIP.NE2 ... n 155.CA HID.CA HIP.CA The first column defines the atom as being a 'Q' atom or a 'neighbouring' atom. The latter will not be included in the 'Q-region' but will be included in the 'change_bonds/change_angles...' sections in case there is a change in bonding/parameters outside the Q region. Additionally, you can define a 'q' atom with 'q_qcp', which will create an additional section for isotopically clean masses used in QCP calculations. The second column is the PDB ID, comprised of residue index and atom name, separated by a dot. The third column is the library ID of this atom in state 1, comprised of residue name and atom name (should be the same as in the structure). The fourth column is the library ID of this atom in state 2. Additional columns can be added for other states. The returned template string contains several missing parts, denoted with <FIX>, which have to be manually replaced with appropriate values. These include the softpair C parameters, Morse parameters, Hij parameters. Args: qmap_file (string): QMAP file path pdb_file (string): state 1 PDB file path (the one built with qprep) forcefield (string): forcefield type (see SUPPORTED_FF) prms_files (list): Q parameter-file paths libs_files (list): Q library-file paths ignore_errors (boolean, optional): don't fail on certain non critical errors Returns: fepstr (string): fepfile template Raises: QMakeFepError """ if forcefield not in SUPPORTED_FF: raise QMakeFepError("Force field '{}' not supported. Use {}" "".format(forcefield, " or ".join(SUPPORTED_FF))) fep_types = { "atoms": [], "bonds": [], "angles": [], "torsions": [], "impropers": [] } fep_changes = { "atoms": [], "charges": [], "bonds": ODict(), "angles": ODict(), "torsions": ODict(), "impropers": ODict() } fep_qcp_atoms = [] fep_morse_prms = {} fep_reacting_atoms = set() num_evb_states = None # parse the MAP file # pdb_ids_map = [ ('q', [pdbid1_state1,]), # ('q', [pdbid2_state1,]), # ... # ('n', [pdbid11_state1,]), # ... # ] # lib_ids_map = [ [lib_id1_state1, lib_id2_state1...], # [lib_id1_state2, lib_id2_state2...], # ... # ] # lib_ids_map = [] pdb_ids_map = [] with open(qmap_file, 'r') as qatom_map: for i, line in enumerate(qatom_map.readlines()): line = re.split("#|\*|\!", line, 1)[0].strip() # remove comments if line == "": continue c = line.split() atom_type = c[0].lower() pdb_id = c[1] lib_ids = c[2:] if atom_type not in ["q", "n", "q_qcp"]: raise QMakeFepError("Lines in the QMAP file should begin " "with either 'q' (qatom) or 'n' " "(neighboring atom) or 'q_qcp' " "(QPI q atom)") try: resid, name = pdb_id.split(".") if not name or not int(resid): raise ValueError except ValueError: raise QMakeFepError("Invalid PDB ID '{}'. Should be " "RESID.ATOMNAME".format(pdb_id)) tmp = (atom_type, [ pdb_id, ]) if tmp in pdb_ids_map: raise QMakeFepError("Duplicate PDB ID: '{}'".format(pdb_id)) pdb_ids_map.append(tmp) if num_evb_states == None: num_evb_states = len(lib_ids) elif len(lib_ids) != num_evb_states: raise QMakeFepError("Number of states in line '{}' not equal " "to number of PDB files".format(line)) for state, lib_id in enumerate(lib_ids): try: resname, name = lib_id.split(".") if not resname or not name: raise ValueError except ValueError: raise QMakeFepError("Invalid library ID '{}'. Should be " "RESNAME.ATOMNAME".format(lib_id)) try: if lib_id in lib_ids_map[state]: raise QMakeFepError("The library IDs in one EVB state " "should be unique (double '{}'), " "otherwise proper bonding can't " "be determined.".format(lib_id)) except IndexError: lib_ids_map.append([]) lib_ids_map[state].append(lib_id) # load libraries qlib = QLib(forcefield, ignore_errors=ignore_errors) for lib in lib_files: try: qlib.read_lib(lib) except QLibError as e: raise QMakeFepError("Problem parsing lib ({}): {}" "".format(lib, e)) # make dummy structures for other states structures = [None for _ in range(num_evb_states)] structures[0] = pdb_file libid_pdbid_map = [{} for _ in range(num_evb_states)] for state in range(1, num_evb_states): state_structure = [] atom_index = 1 processed_residues = [] for i, (q_or_n, pdb_ids_all_states) in enumerate(pdb_ids_map): lib_id = lib_ids_map[state][i] resname, aname = lib_id.split(".") # add all atoms of current residue to the dummy structure # at the same time, storing the mapping lib_id:pdb_id # in libid_pdbid_map for later if resname not in processed_residues: try: residue_lib = qlib.residue_dict[resname] except KeyError: raise QMakeFepError("Residue '{}' not found in library." "".format(resname)) processed_residues.append(resname) res_index = len(processed_residues) for atom in residue_lib.atoms: lib_id2 = "{}.{}".format(resname, atom.name) pdb_id2 = "{}.{}".format(res_index, atom.name) state_structure.append("{:<6}{:5} {:4} {:3} {:5} " "{:8.3f}{:8.3f}{:8.3f}" "".format("ATOM", atom_index, atom.name, resname, res_index, 0, 0, 0)) atom_index += 1 # map the newly created dummy atom's pdb_id to lib_id libid_pdbid_map[state][lib_id2] = pdb_id2 # add pdb_id of current atom in current (dummy structure) # state to pdb_ids_map (using its lib_id) try: pdb_id_this_state = libid_pdbid_map[state][lib_id] except KeyError: raise QMakeFepError( "Library ID '{}' not valid.".format(lib_id)) pdb_ids_all_states.append(pdb_id_this_state) _, structures[state] = tempfile.mkstemp() open(structures[state], "w").write("\n".join(state_structure)) # DEBUG # print "Dummy PDB for st.{}: {}".format(state + 1, structures[state]) # load parameters qprm = QPrm(forcefield, ignore_errors=ignore_errors) for parm in parm_files: try: qprm.read_prm(parm) except QPrmError as e: raise QMakeFepError("Problem with parm ({}): {}" "".format(parm, e)) # load structures and make topologies topologies = [] for state in range(num_evb_states): try: qstruct = QStruct(structures[state], "pdb", ignore_errors=ignore_errors) except QStructError as e: raise QMakeFepError("Problem parsing PDB file ({}): {} " "".format(structures[state], e)) try: topologies.append(QTopology(qlib, qprm, qstruct)) except QTopologyError as e: raise QMakeFepError("Problem building the topology: {}" "".format(e)) # Make _TopoAtom (atoms in QTopology) maps out of qmap's lists # and extract types, type changes and charge changes # # atom_map = [ [_TopoAtom1_state1, _TopoAtom1_state2, ... ], # [_TopoAtom2_state1, _TopoAtom2_state2, ... ], # [_TopoAtom3_state1, _TopoAtom3_state2, ... ], # ... # ] atom_map = [] for i, (q_or_n, pdb_id_all_states) in enumerate(pdb_ids_map): atom_all_states = [] for state, pdb_id in enumerate(pdb_id_all_states): residue, aname = pdb_id.split(".") try: residue = topologies[state].residues[int(residue) - 1] atom = [a for a in residue.atoms if a.name == aname][0] except (KeyError, IndexError) as e: raise QMakeFepError("Atom '{}' doesn't exist in PDB '{}'" "".format(pdb_id, structures[state])) atom_all_states.append(atom) atom_map.append(atom_all_states) # check for stupidity - lib_id in QMAP state 1 # not matching the structure/topology lib_id_qmap = lib_ids_map[0][i] lib_id = "{}.{}".format(atom_all_states[0].residue.name, atom_all_states[0].name) if lib_id != lib_id_qmap: pdb_id = pdb_ids_map[i][1][0] raise QMakeFepError("QMAP state 1 library ID ({}) of atom '{}' " "doesn't match topology library ID ({})." "".format(lib_id_qmap, pdb_id, lib_id)) # For Q atoms (and not the neighbor atoms): # get FEP atom types, type changes and charge changes if q_or_n in ["q", "q_qcp"]: for atom in atom_all_states: if atom.prm not in fep_types["atoms"]: fep_types["atoms"].append(atom.prm) fep_changes["charges"].append([a.charge for a in atom_all_states]) fep_changes["atoms"].append(atom_all_states) if q_or_n == "q_qcp": fep_qcp_atoms.append(atom_all_states) charge_sums = [] for state in range(num_evb_states): charge_sum = sum([c[state] for c in fep_changes["charges"]]) if abs(round(charge_sum) - charge_sum) > 1e-6: raise_or_log("Net charge in state {} not integer: {}" "".format(state + 1, charge_sum), QMakeFepError, logger, ignore_errors=ignore_errors) charge_sums.append(charge_sum) if any([abs(c - charge_sums[0]) > 1e-6 for c in charge_sums]): logger.warning("Net charge changes between states: {}" "".format(" -> ".join([str(c) for c in charge_sums]))) # get all Bonds, Angles, Torsions and Impropers which include # at least one atom defined in qmap batis = {"bonds": [], "angles": [], "torsions": [], "impropers": []} batis["bonds"] = [set() for _ in range(num_evb_states)] batis["angles"] = [set() for _ in range(num_evb_states)] batis["torsions"] = [set() for _ in range(num_evb_states)] batis["impropers"] = [set() for _ in range(num_evb_states)] for atom_all_states in atom_map: for state, atom in enumerate(atom_all_states): _ = [batis["bonds"][state].add(b) for b in atom.bonds] _ = [batis["angles"][state].add(a) for a in atom.angles] _ = [batis["torsions"][state].add(t) for t in atom.torsions] _ = [batis["impropers"][state].add(i) for i in atom.impropers] # map the bonds,angles,torsions,impropers (bati) in different states # to same key (ordered list of state1 PDB_IDs) # # bati_map = # { "bonds": {state1_bond1_key: [bond1_state1, bond1_state2,...], # state1_bond2_key: [bond2_state1, bond2_state2,...], ...}, # "angles": {state1_angle1_key: [angle1_state1, angle1_state2,...],...} # ... } # # also, include only batis which have all atoms defined in qmap # also, detect inter-residue batis and raies QMakeFepError bati_map = {"bonds": {}, "angles": {}, "torsions": {}, "impropers": {}} for state in range(num_evb_states): atoms_in_state = [a_all_st[state] for a_all_st in atom_map] for bati_type in bati_map: for bati in batis[bati_type][state]: # find the corresponding atoms in state1 try: atoms_st1 = [ atom_map[atoms_in_state.index(a)][0] for a in bati.atoms ] except ValueError: # one of the Atoms is not defined in QMAP continue pdbid_index = [] for atom in atoms_st1: pdbid_index.append( (atom.index, "{}.{}".format(atom.residue.index, atom.name))) # order the pdbids to prevent double entries if bati_type == "bonds": pids = sorted(pdbid_index) elif bati_type == "angles": pids = min(pdbid_index, list(reversed(pdbid_index))) elif bati_type == "torsions": pids = min(pdbid_index, list(reversed(pdbid_index))) elif bati_type == "impropers": # topology order == library order == correct order pids = pdbid_index key = " ".join([p[1] for p in pids]) # check for bonds/angles/torsions/impropers that are # shared between residues residue_ids = set(atom.residue.index for atom in bati.atoms) if len(residue_ids) > 1: raise QMakeFepError("Inter-residue bond/angle/torsion '{}'" " not supported. Combine the residues " "into a single library entry if you " "want to make changes over the " "'head-tail' bond.".format(key)) # add bati to bati_map try: bati_map[bati_type][key][state] = bati except KeyError: bati_map[bati_type][key] = [ None for _ in range(num_evb_states) ] bati_map[bati_type][key][state] = bati # DEBUG # for k,v in bati_map.iteritems(): # print k # for k2, v2 in v.iteritems(): # print k2, v2[0], v2[1] def _bati_sort(key, bati_all_states): # to sort bonds/angles.. based on the key # also, breaking and forming bonds have priority try: return (-1 * bati_all_states.index(None), key) except: return (1, key) # find changes between states (add to fep_changes dict) for bati_type, batis in bati_map.iteritems(): for bati_key, bati_all_states in sorted(batis.items(), key=lambda (key, val): \ _bati_sort(key, val)): # bond/angle/.. breaking or forming if None in bati_all_states: fep_changes[bati_type][bati_key] = bati_all_states # add bond atoms to "reactive atoms" set # and replace the bond parameter with a Morse type if bati_type == "bonds": for bati in bati_all_states: if bati != None: fep_reacting_atoms |= set(bati.atoms) # the bond parameter is replaced with a Morse # parameter (_FepPrmMorse) prm_id = bati.prm.prm_id try: bati.prm = fep_morse_prms[prm_id] except KeyError: bati.prm = _FepPrmMorse(bati.prm) fep_morse_prms[prm_id] = bati.prm # the actual values of the parameters are not exactly the same else: tmp = [ bati_all_states[0].prm.strval == bati.prm.strval for bati in bati_all_states ] if not all(tmp): fep_changes[bati_type][bati_key] = bati_all_states # DEBUG # for k,v in fep_changes.iteritems(): # print k # try: # for k2,(v1,v2) in v.iteritems(): # print k2,v1,v2 # except: # for (v1,v2) in v: # print v1,v2 # add parameters of changing batis to fep_types for bati_type in bati_map: for bati_all_states in fep_changes[bati_type].values(): prms = [bati.prm for bati in bati_all_states if bati != None] for prm in prms: if prm not in fep_types[bati_type]: fep_types[bati_type].append(prm) # DEBUG # for k,v in fep_types.iteritems(): # print k # for v2 in v: # print v2 # add reactive atoms from states that have bond==None to fep_reacting_atoms for atom_all_states in fep_changes["atoms"]: for atom in atom_all_states: if atom in fep_reacting_atoms: fep_reacting_atoms |= set(atom_all_states) ######################## # Prepare the output ######################## fep_l = { "atoms": [], "atom_types": [], "qcp_mass": [], "change_atoms": [], "change_charges": [], "soft_pairs": [], "off_diagonals": [], "bond_types": [], "change_bonds": [], "angle_types": [], "change_angles": [], "torsion_types": [], "change_torsions": [], "improper_types": [], "change_impropers": [] } #################### # ATOMS # CHANGE_ATOMS # CHANGE_CHARGES #################### format_atoms = "{:<15} {:<10} # {:<15} {:<15} {:>3}" format_ch_atoms = "{:<10} " + " {:<12}" * num_evb_states + " # {:<}" format_ch_crgs = "{:<10} " + " {:12}"*num_evb_states + " # {:<10}"\ + " {:>12}"*(num_evb_states-1) format_qcp = "{:<15} {:<10} # {:<10}" fep_l["atoms"].append( format_atoms.format("#Q index", "PDB index", "St.1 PDB_ID", "St.1 LIB_ID", "")) tmp = ["#Q index"] tmp.extend(["Type st.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_ID") fep_l["change_atoms"].append(format_ch_atoms.format(*tmp)) tmp = ["#Q index"] tmp.extend(["Charge st.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_ID") tmp.extend( ["dq({}->{})".format(n + 1, n + 2) for n in range(num_evb_states - 1)]) fep_l["change_charges"].append(format_ch_crgs.format(*tmp)) if fep_qcp_atoms: fep_l["qcp_mass"].append("[qcp_mass]") fep_l["qcp_mass"].append( format_qcp.format("#Q index", "Mass", "St.1 PDB_ID")) for i, atom_all_states in enumerate(fep_changes["atoms"]): q_index = i + 1 a = atom_all_states[0] pdb_id = "{}.{}".format(a.residue.index, a.name) lib_id = "{}.{}".format(a.residue.name, a.name) # atoms reacting_flag = " !" * bool( [atom for atom in atom_all_states if atom in fep_reacting_atoms]) fep_l["atoms"].append( format_atoms.format(q_index, "$" + pdb_id + "$", pdb_id, lib_id, reacting_flag)) # change_atoms tmp = [q_index] + [a.prm.prm_id for a in atom_all_states] + [pdb_id] fep_l["change_atoms"].append(format_ch_atoms.format(*tmp)) # charges crgs = [float(a.charge) for a in atom_all_states] tmp = [q_index] + crgs + [pdb_id] \ + [crgs[n+1]-crgs[n] for n in range(num_evb_states-1)] fep_l["change_charges"].append(format_ch_crgs.format(*tmp)) # qcp_atoms if atom_all_states in fep_qcp_atoms: fep_l["qcp_mass"].append( format_qcp.format(q_index, "<FIX>", pdb_id)) ############### # ATOM_TYPES ############### format_atypes = "{:<12} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10}" if forcefield == "amber": fep_l["atom_types"].append( format_atypes.format("#Atom_type", "LJ_Rm", "LJ_eps", "SP_Ci", "SP_ai", "LJ_Rm", "LJ_eps_14", "mass")) else: fep_l["atom_types"].append( format_atypes.format("#Atom_type", "LJ_A", "LJ_B", "SP_Ci", "SP_ai", "LJ_A_14", "LJ_B_14", "mass")) fep_reacting_atoms_prms = [a.prm for a in fep_reacting_atoms] for prm in fep_types["atoms"]: sp_c = 1 sp_a = 2.5 if prm in fep_reacting_atoms_prms: sp_c = "<FIX>" if forcefield == "amber": lj1, lj2 = prm.lj_R, prm.lj_eps lj3, lj4 = lj1, round(lj2 / 1.2, 4) else: lj1, lj2 = prm.lj_A, prm.lj_B lj3, lj4 = round(lj1 / (2**0.5), 4), round(lj2 / (2**0.5), 4) fep_l["atom_types"].append( format_atypes.format(prm.prm_id, lj1, lj2, sp_c, sp_a, lj3, lj4, prm.mass)) ############### # BOND_TYPES ############### format_hbonds = "{:<8} {:>10} {:>10} # {}" format_mbonds = "{:<8} {:^10} {:^10} {:>10} # {}" fep_l["bond_types"].append("## Harmonic format") fep_l["bond_types"].append( format_hbonds.format("#Index", "Fc", "r0", "PRM_ID")) fep_l["bond_types"].append("## Morse format") fep_l["bond_types"].append( format_mbonds.format("#Index", "D", "alpha", "r0", "PRM_ID")) for i, bond_type in enumerate(fep_types["bonds"]): b_index = i + 1 if isinstance(bond_type, _FepPrmMorse): prm_id = "-".join(bond_type.harmonic_prm.prm_id.split()) tmp = format_mbonds.format(b_index, "<FIX_D>", "<FIX_a>", "<FIX_r0>", prm_id) fep_l["bond_types"].append(tmp) else: prm_id = "-".join(bond_type.prm_id.split()) tmp = format_hbonds.format(b_index, bond_type.fc, bond_type.r0, prm_id) fep_l["bond_types"].append(tmp) ############### # CHANGE_BONDS ############### format_bondch = "{:<10} {:<10} " + "{:^5} " * num_evb_states + " # {}" tmp = ["#Atom1", "Atom2"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_bonds"].append(format_bondch.format(*tmp)) for bond_key, bond_all_states in fep_changes["bonds"].iteritems(): # bond_key == "PDB_ID1 PDB_ID2" prm_indexes = [] for b in bond_all_states: if b == None: prm_indexes.append(0) else: btype_index = fep_types["bonds"].index(b.prm) + 1 prm_indexes.append(btype_index) placeholders = ["${}$".format(a) for a in bond_key.split()] pdb_id = "-".join(bond_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_bonds"].append(format_bondch.format(*tmp)) ############### # ANGLE_TYPES ############### format_angles = "{:<8} {:>10} {:>10} # {}" fep_l["angle_types"].append( format_angles.format("#Index", "Fc", "theta0", "PRM_ID")) for i, angle_type in enumerate(fep_types["angles"]): an_index = i + 1 prm_id = "-".join(angle_type.prm_id.split()) tmp = format_angles.format(an_index, angle_type.fc, angle_type.theta0, prm_id) fep_l["angle_types"].append(tmp) ################# # CHANGE_ANGLES ################# format_angch = "{:<10} {:<10} {:<10} " + "{:^5} " * num_evb_states + " # {}" tmp = ["#Atom1", "Atom2", "Atom3"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_angles"].append(format_angch.format(*tmp)) for angle_key, angle_all_states in fep_changes["angles"].iteritems(): # angle_key == "PDB_ID1 PDB_ID2 PDB_ID3" prm_indexes = [] for ang in angle_all_states: if ang == None: prm_indexes.append(0) else: atype_index = fep_types["angles"].index(ang.prm) + 1 prm_indexes.append(atype_index) placeholders = ["${}$".format(a) for a in angle_key.split()] pdb_id = "-".join(angle_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_angles"].append(format_angch.format(*tmp)) ################# # TORSION_TYPES ################# format_torsions = "{:<8} {:>10} {:>10} {:>10} # {}" fep_l["torsion_types"].append( format_torsions.format("#Index", "Fc", "mult", "psi0", "PRM_ID")) tor_index = 1 tor_indexes = [] for i, torsion_type in enumerate(fep_types["torsions"]): prm_id = "-".join(torsion_type.prm_id.split()) prm_indexes = [] for fc, per, psi0, npath in torsion_type.get_prms(): fc = fc / npath tmp = format_torsions.format(tor_index, fc, per, psi0, prm_id) fep_l["torsion_types"].append(tmp) prm_indexes.append(tor_index) tor_index += 1 tor_indexes.append(prm_indexes) ################### # CHANGE_TORSIONS ################### format_torch = "{:<10} {:<10} {:<10} {:<10} " \ + "{:^5} "*num_evb_states + " # {}" tmp = ["#Atom1", "Atom2", "Atom3", "Atom4"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_torsions"].append(format_torch.format(*tmp)) for torsion_key, torsion_all_states in fep_changes["torsions"].iteritems(): # torsion_key == "PDB_ID1 PDB_ID2 PDB_ID3 PDB_ID4" for state, tor in enumerate(torsion_all_states): if tor == None: continue for i in range(len(tor.prm.fcs)): tprm_index = fep_types["torsions"].index(tor.prm) ttype_index = tor_indexes[tprm_index][i] prm_indexes = [0 for _ in range(len(torsion_all_states))] prm_indexes[state] = ttype_index placeholders = ["${}$".format(t) for t in torsion_key.split()] pdb_id = "-".join(torsion_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_torsions"].append(format_torch.format(*tmp)) ################# # IMPROPER_TYPES ################# format_impropers = "{:<8} {:>10} {:>10} # {}" fep_l["improper_types"].append( format_impropers.format("#Index", "Fc", "phi0", "PRM_ID")) for i, improper_type in enumerate(fep_types["impropers"]): imp_index = i + 1 prm_id = "-".join(improper_type.prm_id.split()) tmp = format_impropers.format(imp_index, improper_type.fc, improper_type.phi0, prm_id) fep_l["improper_types"].append(tmp) ################### # CHANGE_IMPROPERS ################### format_impch = "{:<10} {:<10} {:<10} {:<10} " \ + "{:^5} "*num_evb_states + " # {}" tmp = ["#Atom1", "Atom2", "Atom3", "Atom4"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_impropers"].append(format_impch.format(*tmp)) for improper_key, improper_all_states in fep_changes[ "impropers"].iteritems(): # improper_key == "PDB_ID1 PDB_ID2 PDB_ID3 PDB_ID4" prm_indexes = [] for imp in improper_all_states: if imp == None: prm_indexes.append(0) else: itype_index = fep_types["impropers"].index(imp.prm) + 1 prm_indexes.append(itype_index) placeholders = ["${}$".format(i) for i in improper_key.split()] pdb_id = "-".join(improper_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_impropers"].append(format_impch.format(*tmp)) ############## # SOFT_PAIRS ############## for bond_key, bond_all_states in fep_changes["bonds"].iteritems(): if None in bond_all_states: for state, bond in enumerate(bond_all_states): if bond == None: continue atoms_in_state = [atom_all_states[state] for atom_all_states \ in fep_changes["atoms"]] a1_qindex = atoms_in_state.index(bond.atoms[0]) + 1 a2_qindex = atoms_in_state.index(bond.atoms[1]) + 1 fep_l["soft_pairs"].append("{:10} {:10}".format( a1_qindex, a2_qindex)) for k in fep_l.keys(): fep_l[k] = "\n".join(fep_l[k]) fepstr = """\ # Generated with Qtools, version {version} # Date: {date} # CWD: {cwd} # CMDline: {cmd} # [FEP] states {states} [atoms] {atoms} [atom_types] {atom_types} [change_atoms] {change_atoms} [change_charges] {change_charges} [soft_pairs] {soft_pairs} [off_diagonals] # State_i State_j Atom1 Atom2 A_ij mu_ij # ## Example1, Hij=H12=0 (not known in advance) ## 1 2 13 14 0 0 ## Example2, Hij=H12=C*exp(-mu * r_13_14) (C=20.0, mu=0.45) ## 1 2 13 14 20.0 0.45 # <FIX> [bond_types] {bond_types} [change_bonds] {change_bonds} [angle_types] {angle_types} [change_angles] {change_angles} [torsion_types] {torsion_types} [change_torsions] {change_torsions} [improper_types] {improper_types} [change_impropers] {change_impropers} {qcp_mass} """.format(states=num_evb_states, date=time.ctime(), cmd=" ".join(sys.argv), cwd=os.getcwd(), version=__version__, **fep_l) return fepstr
def read_lib(self, libfile): """Read and parse a Q library (.lib) file Add new residues to QLib.residue_dict as _LibResidue objects Args: libfile (string): name/path of Q lib file """ residues = [] with open(libfile, 'r') as lib: section = None for lnumber, line in enumerate(lib.readlines()): lnumber += 1 # remove comments line = re.split("#|\*|\!", line, 1)[0].strip() if line == "": continue if line.startswith("{"): # get the 3 letter code and make a new object resname = line.split("}")[0].strip("{} ").upper() residues.append(_LibResidue(resname, self)) continue if line.startswith("["): section = line.split("]")[0].strip("[] ").lower() continue if not residues or not section: raise QLibError( "Line #{} in LIB file '{}' is not a " "comment and is not inside a {{XXX}} " "section and [XXX...X] subsection:\n{}".format( lnumber, libfile, line)) if residues: residue = residues[-1] if section == "atoms": try: atom_name, atom_type, atom_charge = line.split()[1:4] residue.atoms.append( _LibAtom(atom_name, atom_type, float(atom_charge), residue)) except ValueError: raise QLibError( "Line #{} in LIB file '{}' couldn't" "be parsed (should look like this " "'aindex aname atype charge ...'):\n{}".format( lnumber, libfile, line)) elif section == "bonds": try: a1, a2 = line.split() except ValueError: raise QLibError("Line #{} in LIB file '{}' couldn't " "be parsed (should look like this " "'atom1 atom2'):\n{}".format( lnumber, libfile, line)) else: residue.bonds.append((a1, a2)) elif section == "impropers": try: a1, a2, a3, a4 = line.split() except ValueError: raise QLibError( "Line #{} in LIB file '{}' couldn't be" "parsed (should look like this " "'atom1 atom2 atom3 atom4'):\n{}".format( lnumber, libfile, line)) else: residue._add_improper(a2, [a1, a3, a4]) elif section == "charge_groups": if self.ff_type == "amber": raise QLibError("'Charge_groups' section is not " "compatible with 'amber' forcefield") cgrp = line.split() residue.charge_groups.append(cgrp) elif section == "info": try: key, value = line.split() except ValueError: raise QLibError("Line #{} in LIB file '{}' couldn't be" "parsed (should look like this " "'keyword value'):\n{}".format( lnumber, libfile, line)) else: residue.info[key] = value elif section == "connections": residue.connections.append(" ".join(line.split())) elif section == "build_rules": residue.build_rules.append(" ".join(line.split())) else: logger.warning("Unsupported section in '{}': {}" "".format(libfile, section)) for residue in residues: self._add_residue(residue) if not residues: raise_or_log("No residues found", QLibError, logger, self.ignore_errors)
def check_valid(self): """Checks for duplicates, missing atoms, integer charges, ... Raises QLibError (or logs if QLib.ignore_errors is True). """ names = [a.name for a in self.atoms] types = [a.atom_type for a in self.atoms] charges = [a.charge for a in self.atoms] # check for duplicates for i, name in enumerate(names): if name in names[i + 1:]: raise QLibError("Duplicate atom name '{}' in residue '{}'"\ .format(name, self.name)) # check charge groups for integer charges (and bad atom names) if self.charge_groups: for cg in self.charge_groups: net_charge = 0 for aname in cg: if aname not in names: raise QLibError("Undefined atom {} in charge group" " '{}'".format(aname, " ".join(cg))) net_charge += charges[names.index(aname)] if abs(net_charge - round(net_charge)) > 1e-7: raise_or_log("Net charge of charge group '{}'" " in residue '{}' not integer: {}"\ .format(" ".join(cg), self.name, net_charge), QLibError, logger, self.library.ignore_errors) # check the whole residue for integer charge else: net_charge = sum(charges) if abs(net_charge - round(net_charge)) > 1e-7: raise_or_log( "Net charge of residue '{}' not integer: " "{}".format(self.name, net_charge), QLibError, logger, self.library.ignore_errors) # check bonds for bad atom names for bond_atoms in self.bonds: for aname in bond_atoms: if aname not in names: raise QLibError("Undefined atom {} in bond '{}'"\ .format(aname, " ".join(bond_atoms))) # check impropers for bad atom names for imp_atoms in self.impropers: for aname in imp_atoms: if aname not in names and aname not in ["-C", "+N"]: raise QLibError("Undefined atom '{}' in improper '{}'"\ .format(aname, " ".join(imp_atoms))) # check keywords in info section low_keys = [x.lower() for x in self.INFO_KEYS] for keyword in self.info: if keyword.lower() not in low_keys: raise_or_log( "Keyword '{}' in [info] section not " "not supported (residue '{}')." "".format(keyword, self.name), QLibError, logger, self.library.ignore_errors) # check build rules for build_rule in self.build_rules: br = build_rule.split() try: typ, anames, value = br[0], br[1:5], br[5] except IndexError: raise QLibError("Invalid build rule '{}'" "".format(build_rule)) if typ not in self.BUILD_RULES: raise QLibError("Build_rule '{}' not supported".format(typ)) for aname in anames: if aname not in names: raise QLibError("Undefined atom '{}' in build_rule '{}'" "".format(aname, build_rule))
def read_ffld(self, ffld_file, qstruct): """Read and parse a Macromodel FFLD file for oplsaa parameters. Args: ffld_file (string): path/name of ffld file qstruct (qstructure.QStruct): object created with the same structure file as the ffld The second argument is a QStruct object created with the pdb or mol2 file that was used to create the ffld file. It's needed to map atoms and residue to their names in the structure. Atom-types have the following format: resisduename.ATOMNAME Eg. asp.CA """ if self.ff_type != "oplsaa": raise QLibError("Function not supported with " "force field '{}'".format(self.ff_type)) # keys are ffld atom names, values are tuples: # (StructAtom, LibResidue) lookup_aname = {} residues = [] section = None for line in open(ffld_file, 'r').readlines(): line = line.strip() if (line == "") or ("------" in line): continue elif line.startswith("atom type vdw symbol"): section = "ATOMS" continue elif line.startswith("Stretch k"): section = "BONDS" continue elif line.startswith("Bending k"): section = "ANGLES" continue elif line.startswith("proper Torsion"): section = "TORSIONS" continue elif line.startswith("improper Torsion"): section = "IMPROPERS" continue if section == "ATOMS": # # C1 135 C1 CT -0.0175 3.5000 0.0660 high C: alkanes # lf = line.split() name, type_, vdw, symbol = lf[0:4] charge, sigma, epsilon = map(float, lf[4:7]) quality, comment = lf[7], lf[8:] aindex_struct = len(lookup_aname) atom_struct = qstruct.atoms[aindex_struct] residue_struct = atom_struct.residue # check if element from ffld matches the one in the structure # (just the first letters) if name[0].lower() != atom_struct.name[0].lower(): raise_or_log( "Atom element mismatch, possible wrong " "order of atoms: '{}' (struct) '{}' (ffld)".format( atom_struct.name, name), QLibError, logger, self.ignore_errors) # crate new library entry if first atom or different residue if not residues or \ residue_struct != qstruct.atoms[aindex_struct-1].residue: residues.append(_LibResidue(residue_struct.name, self)) residue = residues[-1] # append the atom to the residue atom_name = atom_struct.name residue_name = residue_struct.name.lower() atom_type = "{}.{}".format(residue_name, atom_name) residue.atoms.append( _LibAtom(atom_name, atom_type, charge, residue)) lookup_aname[name] = (atom_struct, residue) elif section == "BONDS": # # C1 H2 340.00000 1.09000 high 140 0 CT -HC ==> CT -HC # lf = line.split() atom1_struct, residue1 = lookup_aname[lf[0]] atom2_struct, residue2 = lookup_aname[lf[1]] atom_name1 = atom1_struct.name atom_name2 = atom2_struct.name rindex1 = atom1_struct.residue.index rindex2 = atom2_struct.residue.index if rindex1 < rindex2: residue1.connections.append("tail " + atom_name1) residue2.connections.append("head " + atom_name2) elif rindex1 > rindex2: residue1.connections.append("head " + atom_name1) residue2.connections.append("tail " + atom_name2) else: residue1.bonds.append((atom_name1, atom_name2)) elif section == "IMPROPERS": # # C21 C22 C20 O19 2.200 high aromatic atom # lf = line.split() atoms = [lookup_aname[aname][0] for aname in lf[0:4]] center_atom = atoms.pop(2) # third pos rindex = center_atom.residue.index atom_names = [] for atom in atoms: name = atom.name if atom.residue.index > rindex: name = "+" + name elif atom.residue.index < rindex: name = "-" + name atom_names.append(name) atom_names.sort() residue = lookup_aname[lf[2]][1] residue._add_improper(center_atom.name, atom_names) for residue in residues: # add default charge group (all atoms) atom_names = [a.name for a in residue.atoms] residue.charge_groups.append(atom_names) # add residue to library self._add_residue(residue) if not residues: raise_or_log("No residues found", QLibError, logger, self.ignore_errors) elif len(residues) > 1: logger.warning("Possible connections and improper " "definitions of first and last residue " "can be missing in the output!")
def read_prepin_impropers(self, prepin_file): """Read and parse an Amber prepin (.prepi) file NOTE: Extracts only improper definitions for residues already defined in QLib.residue_dict (usually obtained with read_amber_lib or read_mol2) """ if self.ff_type != "amber": raise QLibError("Function not supported with " "force field '{}'".format(self.ff_type)) if len(self.residue_dict) == 0: raise_or_log("Cannot add impropers to empty library", QLibError, logger, self.ignore_errors) # get all the impropers from the file section = None residue = None impropers = {} with open(prepin_file) as prepi: for line in prepi.readlines(): lf = line.split() if not lf: section = None continue if len(lf) >= 2 and lf[1] == "INT": residue = lf[0].upper() if not residue in impropers: impropers[residue] = [] continue if not residue: continue if "DONE" in lf: residue = None continue if lf[0] == "IMPROPER": section = "improper" continue if section == "improper": # example line (center atom is N) # -M CA N H imp = " ".join(line.split()) impropers[residue].append(imp) res_not_in_lib = set() # add them to the residues in the library for resname, imps in impropers.iteritems(): for imp in imps: try: # different naming convention for head and tail imp = imp.replace("-M", "-C").replace("+M", "+N") other_atoms = imp.split() # third one is center center_atom = other_atoms.pop(2) self.residue_dict[resname]._add_improper( center_atom, other_atoms) except KeyError: res_not_in_lib.add(resname) for resname in res_not_in_lib: logger.warning("Prepi: Residue '{}' NOT found in library. " "Impropers will be ignored. Check that residues " "have the same names as in the library " "(.lib, .mol2).".format(resname))
def read_mol2(self, mol2_file): """Read and parse a mol2 file. Add the residues to QLib.residue_dict as _LibResidue objects Args: mol2_file (string): name/path of mol2 file """ if self.ff_type != "amber": raise QLibError("Function not supported with " "force field '{}'".format(self.ff_type)) aindex, old_aindex = None, None rindex, old_rindex = None, None residues = [] section = None for line in open(mol2_file, 'r').readlines(): if line.startswith("@<TRIPOS>"): section = line.replace("@<TRIPOS>", "").strip() if section == "MOLECULE": # lookup for bonds section lookup_aindex = {} continue if section == "ATOM": if aindex != None: old_aindex = aindex if rindex != None: old_rindex = rindex lf = line.split() aindex, aname = int(lf[0]), lf[1] atype = lf[5] rindex = int(lf[6]) rname = lf[7][0:3].upper() charge = float(lf[8]) if old_aindex != None and aindex - old_aindex != 1: logger.warning("Bad Mol2 format - atom " "index {} followed by {}".format( old_aindex, aindex)) if old_rindex == None or old_rindex != rindex: residues.append(_LibResidue(rname, self)) if old_rindex and rindex - old_rindex != 1: logger.warning("Bad Mol2 format - residue " "index {} followed by {}".format( old_rindex, rindex)) lib_residue = residues[-1] lib_residue.atoms.append( _LibAtom(aname, atype, charge, lib_residue)) lookup_aindex[lf[0]] = (aname, rindex, lib_residue) elif section == "BOND": lf = line.split() aname1, rindex1, residue1 = lookup_aindex[lf[1]] aname2, rindex2, residue2 = lookup_aindex[lf[2]] if rindex1 < rindex2: residue1.connections.append("tail " + aname1) residue2.connections.append("head " + aname2) elif rindex1 > rindex2: residue1.connections.append("head " + aname1) residue2.connections.append("tail " + aname2) else: residue1.bonds.append((aname1, aname2)) for residue in residues: self._add_residue(residue) if not residues: raise_or_log("No residues found", QLibError, logger, self.ignore_errors)
def read_amber_lib(self, libfile): """Read and parse an Amber library (.lib) file Add new residues to QLib.residue_dict as _LibResidue objects Args: libfile (string): name/path of Amber lib file """ if self.ff_type != "amber": raise QLibError("Function not supported with force field" "'{}'".format(self.ff_type)) residues = [] section = None with open(libfile) as lib: for lnumber, line in enumerate(lib.readlines()): lnumber += 1 line = line.strip() if not line: continue if line.startswith("!!"): continue # ignore comment if residues: residue = residues[-1] if line[0] == "!": lf = line.split()[0].split(".") if len(lf) > 2: rname = lf[1].upper() if not residues or residue.name != rname: residues.append(_LibResidue(rname, self)) section = lf[3] else: raise QLibError("Line #{} in LIB file '{}' " "couldn't be parsed:\n{}".format( lnumber, libfile, line)) continue if not section or not residues: continue if section == "atoms": lf = line.split() try: name, atype = lf[0].strip('"'), lf[1].strip('"') charge = float(lf[7]) atype = atype.replace("*", "star") residue.atoms.append( _LibAtom(name, atype, charge, residue)) except ValueError: raise QLibError("Line #{} in LIB file '{}' " "couldn't be parsed:\n{}".format( lnumber, libfile, line)) elif section == "connectivity": atomnames = [a.name for a in residue.atoms] try: ai1, ai2 = line.split()[:2] a1 = atomnames[int(ai1) - 1] a2 = atomnames[int(ai2) - 1] if a1 not in atomnames or a2 not in atomnames: raise QLibError("Undefined atom(s) {} and/or {} " "mentioned in the connectivity " "section of '{}', ln.{}.".format( a1, a2, libfile, lnumber)) residue.bonds.append((a1, a2)) except ValueError: raise QLibError("Line #{} in LIB file '{}' " "couldn't be parsed:\n{}".format( lnumber, libfile, line)) elif section == "residueconnect": atomnames = [a.name for a in residue.atoms] try: ai1, ai2 = line.split()[:2] a1 = atomnames[int(ai1) - 1] a2 = atomnames[int(ai2) - 1] if a1 not in atomnames or a2 not in atomnames: raise QLibError("Undefined atom(s) {} and/or {} " "mentioned in the residueconnect" " section of '{}', ln.{}.".format( a1, a2, libfile, lnumber)) residue.connections.append("head " + a1) residue.connections.append("tail " + a2) except ValueError: raise QLibError("Line #{} in LIB file '{}' " "couldn't be parsed:\n{}".format( lnumber, libfile, line)) for residue in residues: self._add_residue(residue) if not residues: raise_or_log("No residues found", QLibError, logger, self.ignore_errors)
def read_ffld(self, ffld_file, qstruct): """ Read and parse a Macromodel's FFLD file for oplsaa parameters. Args: ffld_file (string): path/name of ffld file qstruct (qstructure.QStruct): object created with the same structure file as the ffld Returns: overwritten_prms (list): list of overwritten parameters The second argument is a QStruct object created with the pdb or mol2 file that was used to create the ffld file. It's needed to map atoms and residue to their names in the structure. Same parameters with equal values (duplicates) are overwritten, same parameters with different values raise QPrmError, unless QPrm.ignore_errors is set to True. Created atom-types have the following format: resisduename.ATOMNAME Eg. asp.CA, pxn.OP """ if self.ff_type != "oplsaa": raise QPrmError("Function not supported with " "force field '{}'".format(self.ff_type)) # keys are ffld atom names, values are atom_types ("resname.atname") lookup_aname = {} section = None prms = {"atoms": [], "bonds": [], "angles": [], "torsions": [], "impropers": []} for line in open(ffld_file, 'r').readlines(): line = line.strip() if (line == "") or ("------" in line): continue elif line.startswith("atom type vdw symbol"): section = "ATOMS" continue elif line.startswith("Stretch k"): section = "BONDS" continue elif line.startswith("Bending k"): section = "ANGLES" continue elif line.startswith("proper Torsion"): section = "TORSIONS" continue elif line.startswith("improper"): section = "IMPROPERS" continue # # C1 135 C1 CT -0.0175 3.5000 0.0660 high C: alkanes # if section == "ATOMS": lf = line.split() try: name, type_, vdw, symbol = lf[0:4] charge, sigma, epsilon = map(float, lf[4:7]) comment = "FFLD: {}_{}_{} {}".format(symbol, vdw, type_, " ".join(lf[8:])) except Exception: raise QPrmError("Could not parse line: '{}'" .format(line)) lj_A_i = (4 * epsilon * sigma**12)**0.5 lj_B_i = (4 * epsilon * sigma**6)**0.5 element = "".join(c for c in vdw if c.isalpha()) try: mass = ATOM_MASSES[element.upper()] except KeyError: logger.warning("Mass for element '{}' (atom '{}') " "not found, set it manually." .format(element, name)) mass = "<FIX>" aindex_struct = len(lookup_aname) atom_struct = qstruct.atoms[aindex_struct] residue_struct = atom_struct.residue # check if element from ffld matches the one in the structure # (just the first letters) if name[0].lower() != atom_struct.name[0].lower(): raise_or_log("Atom element mismatch, possible wrong " "order of atoms: '{}' (struct) '{}' (ffld)" .format(atom_struct.name, name), QPrmError, logger, self.ignore_errors) # make unique atom_type atom_name = atom_struct.name residue_name = residue_struct.name.lower() atype = "{}.{}".format(residue_name, atom_name) # make PrmAtom atom_type = _PrmAtom(atype, mass, lj_A=lj_A_i, lj_B=lj_B_i, comment=comment) prms["atoms"].append(atom_type) # add atom name: atom type lookup_aname[name] = atype # # C1 H2 340.00000 1.09000 high 140 0 CT -HC ==> CT -HC # if section == "BONDS": lf = line.split() try: anames = lf[0:2] fc, r0 = map(float, lf[2:4]) comment = "FFLD: " + " ".join(lf[4:]) except Exception as e: raise QPrmError("Could not parse line: '{}'" .format(line)) atypes = [lookup_aname[name] for name in anames] bond = _PrmBond(atypes, fc*2.0, r0, comment=comment) prms["bonds"].append(bond) # # H2 C1 H3 33.00000 107.80000 high ... # if section == "ANGLES": lf = line.split() try: anames = lf[0:3] fc, theta0 = map(float, lf[3:5]) comment = "FFLD: " + " ".join(lf[5:]) except Exception as e: raise QPrmError("Could not parse line: '{}'" .format(line)) atypes = [lookup_aname[name] for name in anames] angle = _PrmAngle(atypes, fc*2.0, theta0, comment=comment) prms["angles"].append(angle) # # O2 P1 O3 C4 0.000 0.000 0.562 0.000 high ... # if section == "TORSIONS": lf = line.split() try: anames = lf[0:4] fcs = map(float, lf[4:8]) comment = "FFLD: " + " ".join(lf[8:]) except Exception as e: raise QPrmError("Could not parse line: '{}'" .format(line)) multiplicity = (1.0, 2.0, 3.0, 4.0) phi0s = (0.0, 180.0, 0.0, 180.0) paths = (1.0, 1.0, 1.0, 1.0) atypes = [lookup_aname[name] for name in anames] torsion = _PrmTorsion(atypes, comment=comment) for fc, nmin, phi0, path in zip(fcs, multiplicity, phi0s, paths): if abs(fc) > 0.000001: torsion.add_prm(fc/2.0, nmin, phi0, path) if not torsion.get_prms(): torsion.add_prm(0.0, 1.0, 0.0, 1.0) prms["torsions"].append(torsion) # # C21 C22 C20 O19 2.200 high ... # if section == "IMPROPERS": lf = line.split() try: anames = lf[0:4] fc = float(lf[4]) comment = "FFLD: " + " ".join(lf[5:]) except Exception as e: raise QPrmError("Could not parse line: '{}'" .format(line)) atypes = [lookup_aname[name] for name in anames] center_atom = atypes.pop(2) improper = _PrmImproper(center_atom, atypes, fc/2.0, 180.0, multiplicity=2, comment=comment) prms["impropers"].append(improper) dups = [] for type_, params in prms.iteritems(): for prm in params: dup = self._add_prm(prm) if dup != None: dups.append(dup) return dups
def read_amber_parm(self, parm_fn): """ Read and parse an Amber parameter file (.parm). Args: parm_fn (string): name of parameter file Returns: overwritten_prms (list): list of overwritten parameters Same parameters with equal values (duplicates) are overwritten, same parameters with different values raise QPrmError, unless QPrm.ignore_errors is set to True. At the moment, all comments are ignored. """ if self.ff_type != "amber": raise QPrmError("Function not supported with " "force field '{}'".format(self.ff_type)) prms = {"atoms": [], "bonds": [], "angles": [], "torsions": [], "impropers": []} with open(parm_fn) as parmf: # line with FF description parmf.readline() # get the masses while True: line = parmf.readline().split() if not line: break try: atom_name, mass = line[0].replace("*", "star"), float(line[1]) except Exception as e: raise QPrmError("Could not parse line '{}'".format(line)) try: old_mass = self.amber_masses[atom_name] except KeyError: self.amber_masses[atom_name] = mass else: if abs(mass - old_mass) > 0.00001: raise_or_log("Different masses for same type ({}): " "{}, {}" .format(atom_name, mass, old_mass), QPrmError, logger, self.ignore_errors) # useless line (hydrophilic atoms?) parmf.readline() # get the bond types while True: line = parmf.readline().strip() if not line: break try: a_types = line[0:2], line[3:5] a_types = [x.strip().replace("*", "star") for x in a_types] fc, r0 = float(line[5:15]), float(line[15:25]) fc *= 2 # In Q the fc is divided by 2 except Exception as e: raise QPrmError("Could not parse line '{}'".format(line)) bond = _PrmBond(a_types, fc, r0) prms["bonds"].append(bond) # get the angle types while True: line = parmf.readline().strip() if not line: break try: a_types = line[0:2], line[3:5], line[6:8] a_types = [x.strip().replace("*", "star") for x in a_types] fc, t0 = float(line[8:18]), float(line[18:28]) fc *= 2 # In Q the fc is divided by 2 except Exception as e: raise QPrmError("Could not parse line '{}'".format(line)) angle = _PrmAngle(a_types, fc, t0) prms["angles"].append(angle) # get the torsion (dihedral) types while True: line = parmf.readline().strip() if not line: break try: a_types = line[0:2], line[3:5], line[6:8], line[9:11] a_types = [x.strip().replace("*", "star") for x in a_types] a_types = map(lambda x: "?" if x == "X" else x, a_types) npaths = float(line[11:15]) fc, phase = float(line[15:30]), float(line[30:45]) try: multiplicity = float(line[45:60]) except ValueError: # some amber parm files are shit multiplicity = float(line[45:55]) except Exception as e: raise QPrmError("Could not parse line '{}'".format(line)) torsion = _PrmTorsion(a_types) # torsion parameters belonging to the same torsion # must be sequential in the parameter file, # otherwise there is no way of knowing which is correct if prms["torsions"] and torsion.prm_id == \ prms["torsions"][-1].prm_id: torsion = prms["torsions"][-1] else: prms["torsions"].append(torsion) try: torsion.add_prm(fc, multiplicity, phase, npaths) # in case of two parms sharing same multiplicity except ValueError: raise QPrmError("Duplicate parameter found: {}" .format(torsion)) # get the improper types while True: line = parmf.readline().strip() if not line: break try: a_types = line[0:2], line[3:5], line[6:8], line[9:11] a_types = [x.strip().replace("*", "star") for x in a_types] a_types = map(lambda x: "?" if x == "X" else x, a_types) center_atom = a_types.pop(2) fc, phi0 = float(line[15:30]), float(line[30:45]) try: multiplicity = float(line[45:60]) except ValueError: # some amber parm files are shit multiplicity = float(line[45:55]) except Exception as e: raise QPrmError("Could not parse line '{}'".format(line)) improper = _PrmImproper(center_atom, a_types, fc, phi0, multiplicity=multiplicity) prms["impropers"].append(improper) # two useless lines parmf.readline() parmf.readline() # list of same parameters same_types = {} while True: line = parmf.readline().replace("*", "star").split() if not line: break same_types[line[0]] = line # another useless line (MOD4 RE) parmf.readline() # get the nonbonding parameters while True: line = parmf.readline().split() if not line: break try: a_type = line[0].replace("*", "star") same_a_types = same_types.get(a_type, [a_type,]) lj_R, lj_eps = map(float, line[1:3]) except Exception as e: raise QPrmError("Could not parse line '{}'".format(line)) try: mass = self.amber_masses[a_type] except KeyError: raise QPrmError("No mass for atom type: {}".format(a_type)) for a_type in same_a_types: atom_type = _PrmAtom(a_type, mass, lj_R=lj_R, lj_eps=lj_eps) prms["atoms"].append(atom_type) dups = [] for type_, params in prms.iteritems(): for prm in params: dup = self._add_prm(prm) if dup != None: dups.append(dup) return dups