def test_read_mol2_unsafe(self): # ignore_errors will skip bad indexes but can produce crap qstruct_bad = QStruct("data/all_amino_acids_bad.mol2", "mol2", ignore_errors=True) qstruct = QStruct("data/all_amino_acids.mol2", "mol2") assert len(qstruct.atoms) == len(qstruct_bad.atoms) assert len(qstruct.residues) == len(qstruct_bad.residues) - 2 assert len(qstruct.molecules) == len(qstruct_bad.molecules)
def test_types_ffld(self): qstruct = QStruct("data/ace_ash_nma.pdb", "pdb") qprm = QPrm("oplsaa") qprm.read_ffld("data/ace_ash_nma.ffld11", qstruct) print(list(qprm.torsions.keys())) lj_A_i = (4 * 0.17 * ((3.25)**12))**0.5 lj_B_i = (4 * 0.17 * ((3.25)**6))**0.5 at = qprm.atom_types["nma.N"] assert is_close(at.lj_A, lj_A_i) assert is_close(at.lj_B, lj_B_i) bond = qprm.bonds["ace.C ace.CH3"] assert is_close(bond.fc / 2.0, 317.0) assert is_close(bond.r0, 1.522) ang = qprm.angles["ash.C ash.CA ash.CB"] assert is_close(ang.fc / 2.0, 63.0) assert is_close(ang.theta0, 111.1) tors = qprm.torsions["ace.C ash.N ash.CA ash.C"] assert is_close(tors.fcs[0] * 2.0, -2.365) assert is_close(tors.fcs[1] * 2.0, 0.912) assert is_close(tors.fcs[2] * 2.0, -0.850) assert is_close(tors.multiplicities[0], 1.0) assert is_close(tors.multiplicities[1], 2.0) assert is_close(tors.multiplicities[2], 3.0) assert is_close(tors.phases[0], 0.0) assert is_close(tors.phases[1], 180.0) assert is_close(tors.phases[2], 0.0)
def test_read_ffld(self): qstruct = QStruct("data/ace_ash_nma.pdb", "pdb") qprm = QPrm("oplsaa") qprm.read_ffld("data/ace_ash_nma.ffld11", qstruct) assert len(qprm.atom_types) == 25 assert len(qprm.bonds) == 24 assert len(qprm.angles) == 40 assert len(qprm.torsions) == 49 assert len(qprm.impropers) == 5
def test_read_mol2(self): # check the basics (number of atoms&residues, names...) qstruct = QStruct("data/all_amino_acids.mol2", "mol2") assert "HH31 CH3 HH32 HH33 C" == \ " ".join([a.name for a in qstruct.atoms[0:5]]) assert "ACE ALA ARG" == \ " ".join([r.name for r in qstruct.residues[0:3]]) assert len(qstruct.atoms) == 456 assert len(qstruct.residues) == 30 assert len(qstruct.molecules) == 1
def test_convert_placeholders(self): inp_string = """ $$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$ $2.CB$ $3.CA$ $LAST.ID$ """ out_string = """ $$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$ 11 21 25 """.strip() qstruct = QStruct("data/ace_ash_nma.pdb", "pdb") assert qstruct.convert_placeholders(inp_string).strip() == out_string
def test_read_ffld(self): qlib = QLib("oplsaa") qstruct = QStruct("data/ace_ash_nma.pdb", "pdb") qlib.read_ffld("data/ace_ash_nma.ffld11", qstruct) assert len(qlib.residue_dict) == 3 assert len(qlib.residue_dict["ACE"].atoms) == 6 assert len(qlib.residue_dict["ASH"].atoms) == 13 assert len(qlib.residue_dict["NMA"].atoms) == 6 ash = qlib.residue_dict["ASH"] assert ash.atoms[1].atom_type == "ash.CA" assert is_close(ash.atoms[1].charge, 0.14) assert "tail C" in ash.connections assert "head N" in ash.connections
def test_convert_placeholders(self): inp_string = """ $$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$ $2.CB$ $3.CA$ $LAST.ID$ """ out_string = """ $$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$ 11 19 456 """.split() qstruct = QStruct("data/all_amino_acids.mol2", "mol2") assert qstruct.convert_placeholders(inp_string).split() == out_string # FAIL with pytest.raises(QStructError): qstruct.convert_placeholders("$1.CB$")
def __init__(self, qcalc_exec, calcdirs, pdb_file, en_list_fn, lambdas_A, lambdas_B, resid_first, resid_last, scale_ionized, nthreads, qmask=None): self._en_list_fn = en_list_fn self._qcalc_exec = qcalc_exec try: self._pdb_qstruct = QStruct(pdb_file, "pdb") except QStructError as error_msg: raise QGroupContribError("Can't parse PDB file '{}': {}" "".format(pdb_file, error_msg)) self._calcdirs = [os.path.relpath(cd) for cd in calcdirs] self._nthreads = nthreads self._lambdas_A = lambdas_A self._lambdas_B = lambdas_B self._resid_first = resid_first self._resid_last = resid_last self._scale_ionized = scale_ionized self._qmask = qmask self._qcalc_io = ODict() self.gcs = ODict() self.failed = ODict() self.qcalc_version = None self.kill_event = threading.Event() lambda1_st1, lambda2_st1 = lambdas_A[0], lambdas_B[0] sci = self._scale_ionized colnames = ["Residue id", "Residue name", "N", "VdW(l={:5.4f}->l={:5.4f})_mean" "".format(lambda1_st1, lambda2_st1), "VdW(l={:5.4f}->l={:5.4f})_stdev" "".format(lambda1_st1, lambda2_st1), "El(l={:5.4f}->l={:5.4f})_(scale={})_mean" "".format(lambda1_st1, lambda2_st1, sci), "El(l={:5.4f}->l={:5.4f})_(scale={})_stdev" "".format(lambda1_st1, lambda2_st1, sci)] self.gcs_stats = DataContainer(colnames)
def test_read_pdb_fail(self): # bad atom indexes will fail parsing with pytest.raises(QStructError): QStruct("data/all_amino_acids_bad.pdb", "pdb")
def genfeps(fep_proc_file, relax_input_file, restraint, energy_list_fn, frames, repeats, fromlambda, prefix, first_frame_eq, pdb_file=None, fep_file=None, runscript_file=None, ignore_errors=False): """Generates inputs for a FEP/MD simulation with Q (qdyn5). Arguments: fep_proc_file (string): genfeps procedure file pathname relax_input_file (string): pathname of last relaxation step input restraint (string): restraint coordinate (a) energy_list_fn (string): name of file that will contain the en.f.list frames (int): number of FEP frames repeats (int): number of repeats/replicas fromlambda (float): starting lambda (0.0 - 1.0) prefix (string): prefix for repeat directories first_frame_eq (boolean): use equil instead of first frame (cadee) Optional arguments: pdb_file (string): pdb pathname (used to convert placeholders) fep_file (string): alternate fep file pathname (ignoring input's fep) runscript_file (string): slurm/sge run script ignore_errors (boolean): passed to QStruct and QDynInp - write to logger instead of raising exceptions on non-critical things Returns: rep_dirs (list): list of created replica folders (a) Restraint coordinate can be set to: 'inp' - whatever is defined in relax_input_file 'top' - topology 'relax' - endpoint of relaxation """ frames = int(frames) repeats = int(repeats) if fromlambda != None: fromlambda = float(fromlambda) # constants PREFIX_EQ = "equil_" PREFIX_FEP = "fep_" # check if files exist for k, v in locals().iteritems(): if k in [ "pdb_file", "fep_proc_file", "fep_file", "runscript_file", "relax_input_file" ]: if v and not os.path.lexists(v): raise QGenfepsError("File '{}' doesn't exist.".format(v)) if restraint not in ["top", "relax", "inp"]: raise QGenfepsError("Argument 'restraint' has to be either " "'inp', 'top' or 'relax'") # find and replace atom placeholders. # if no PDB was given to replace them, exit fep_proc_str = open(fep_proc_file, 'r').read() c = find_placeholders(fep_proc_str) if c and not pdb_file: raise QGenfepsError("Found placeholders in proc. file, but no PDB " "was given: {}".format(", ".join(c))) elif c: logger.info("These placeholders will be replaced with atom indices: " + ", ".join(c)) try: qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors) fep_proc_str = qstruct.convert_placeholders(fep_proc_str) except QStructError as err_msg: raise QGenfepsError("Failed to replace placeholders: {}" "".format(err_msg)) # make a nice header comment in each input file with the header_comment = """\ # Generated with QTools, version {} # Date: {} # CWD: {} # Cmdline: {} """.format(__version__, time.ctime(), os.getcwd(), " ".join(sys.argv)) # get topology and fep and others from the last relaxation input top_file_abs, fep_file_abs, re_file_abs, rest_file = None, None, None, None lambda_initial = None try: c = QDynInput(open(relax_input_file, 'r').read(), ignore_errors=ignore_errors) except QDynInputError as err_msg: raise QGenfepsError("There is something wrong with the given input " "file ({}): {}".format(relax_input_file, err_msg)) di = os.path.dirname(relax_input_file) try: files = c.parameters["files"] lambda_initial = float(c.parameters["lambdas"].split()[0]) top_file_abs = os.path.join(di, files["topology"]) re_file_abs = os.path.join(di, files["final"]) fep_file_abs = os.path.join(di, files["fep"]) if "restraint" in files: rest_file = os.path.join(di, files["restraint"]) except KeyError as err_msg: raise QGenfepsError("Parsing the relaxation input file failed, " "keyword missing... {}".format(err_msg)) # check if the files actually exist for fn, descr in [(top_file_abs, "topology"), (fep_file_abs, "fep"), (re_file_abs, "final"), (rest_file, "restraint")]: if fn and not os.path.lexists(fn): raise QGenfepsError("When parsing the input, found this filename " "'{}' next to the '{}' command. Unfortunately," " the file doesnt exist...".format(fn, descr)) # change the FEP (when debugging your system, you might want to # use an old relax and not waste 100M core hours when changing # a soft core value in the fep) if fep_file: fep_file_abs = os.path.abspath(fep_file) # find and replace atom placeholders in FEP file # if no PDB was given to replace them, exit fep_file_str = open(fep_file_abs, 'r').read() c = find_placeholders(fep_file_str) if c and not pdb_file: raise QGenfepsError("Found placeholders in FEP file, but no PDB was " "given: {}".format(", ".join(c))) elif c: logger.info("Replacing FEP file placeholders...") try: qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors) fep_file_str = qstruct.convert_placeholders(fep_file_str) except QStructError as err_msg: raise QGenfepsError("Failed to replace placeholders: {}" "".format(err_msg)) # change the inital lambda (this is not recommended, the system should # be properly relaxed at a particual lambda before doing FEP) if fromlambda != None: lambda_initial = float(fromlambda) if lambda_initial > 1.0 or lambda_initial < 0.0: raise QGenfepsError("Lambda value is bogus, are you on drugs?") # create lambda values, find the closest to the starting one and # rearrange accordingly: [0.0, 0.02, 0.04, ... 0.98, 1.0] for frames==51 lambdas = [float(num) / (frames - 1) for num in xrange(0, frames)] # [2,] for lambda_initial == 0.04 (or close to 0.04) and frames==51 l_i = [i for i in xrange(0, frames) if \ abs(lambdas[i] - lambda_initial) <= (1.0 / frames)] # there should be only one l_i = l_i[0] lambda_initial = lambdas[l_i] # [0.02, 0.0,] for the case of lambda_initial == 0.04 and frames == 51 forward_lambdas = list(reversed(lambdas[0:l_i])) # [0.06, 0.08, ..., 1.0] for the case of lambda_initial == 0.04, fr. == 51 backward_lambdas = lambdas[l_i + 1:] lambdas = [ lambda_initial, ] + forward_lambdas + backward_lambdas # print out some useful information logger.info("Using restart file: {}" "".format(os.path.relpath(re_file_abs))) logger.info("Using topology file: {}" "".format(os.path.relpath(top_file_abs))) logger.info("Using FEP file: {}" "".format(os.path.relpath(fep_file_abs))) logger.info("Starting from lambda value (state 1): {}" "".format(lambda_initial)) logger.info("Number of FEP frames: {} ".format(frames)) # create a temporary directory to store the files that are identical # in all replicas - top, fep, runscript, relax restart, restraint file # (if any) and copy the common files TMPDIR = tempfile.mkdtemp() top_fn = os.path.basename(top_file_abs) fep_fn = os.path.basename(fep_file_abs) relax_re_fn = "cont_" + os.path.basename(re_file_abs) shutil.copy2(top_file_abs, TMPDIR) shutil.copy2(re_file_abs, os.path.join(TMPDIR, relax_re_fn)) open(os.path.join(TMPDIR, fep_fn), "w").write(fep_file_str) if runscript_file: shutil.copy2(runscript_file, TMPDIR) else: logger.info("No Q runscript given.") # handle the whole restraint coordinates crap... # rest_file is the file from the relaxation input (if any) # rest_fn is the basename of the restraints file (either from input # or relaxed.re.rest), or None if rest. to topology if restraint == "relax": logger.info("Restraining to: relaxation") rest_fn = "cont_" + os.path.basename(re_file_abs) + ".rest" shutil.copy2(re_file_abs, os.path.join(TMPDIR, rest_fn)) elif restraint == "top": logger.info("Restraining to: topology") rest_fn = None else: # default, from input if rest_file: logger.info("Restraining to: {} (from input)" "".format(os.path.relpath(rest_file))) rest_fn = "cont_" + os.path.basename(rest_file) shutil.copy2(rest_file, os.path.join(TMPDIR, rest_fn)) else: logger.info("Restraining to: topology (from input)") rest_fn = None # parse the proc file general_inp = [] eq_steps_inps = [ [], ] fep_inp = [] script_vars = {} section = "" for line in fep_proc_str.split("\n"): # remove comments and strip whitespaces. line = re.split("#|\!", line)[0].strip() # empty lines are useless if line == "": continue # found a section if line[0] == "{": section = line.strip("{}").lower() continue if not section: raise QGenfepsError("Parsing the procedure file failed... This " "line: '{}' is not inside any section:" "".format(line)) if section == "script_vars": c = line.split() var, value = c[0], " ".join(c[1:]) script_vars[var] = value elif section == "general": general_inp.append(line) elif section == "steps_equil": if "__________" in line: eq_steps_inps.append([]) else: eq_steps_inps[-1].append(line) elif section == "fep": fep_inp.append(line) else: raise QGenfepsError("Parsing the procedure file failed: " "Unsupported section: '{}'".format(section)) # check for steps with no parameters (too many _________ lines) # and remove them for i in range(len(eq_steps_inps) - 1, -1, -1): if not eq_steps_inps[i]: eq_steps_inps.pop(i) # check for missing sections for l, n in ((general_inp, "GENERAL"), (eq_steps_inps, "STEPS_EQUIL"), (fep_inp, "FEP")): if not l: raise QGenfepsError("Parsing the procedure file failed: " "Section '{}' is missing".format(n)) # join lists of lines to strings and replace the placeholders script_variables = sorted(script_vars.items(), reverse=True) gen_inp_s = "\n".join(general_inp) fep_inp_s = "\n".join(fep_inp) eq_steps_inps_s = ["\n".join(eq_s_inp) for eq_s_inp in eq_steps_inps] for placeholder, value in script_variables: gen_inp_s = gen_inp_s.replace(placeholder, value) fep_inp_s = fep_inp_s.replace(placeholder, value) for step_i, eq_step_inp_s in enumerate(eq_steps_inps_s): eq_steps_inps_s[step_i] = eq_step_inp_s.replace(placeholder, value) #################### # make equil. inputs eq_steps = [] for step_n, eq_step_inp_s in enumerate(eq_steps_inps_s): # create the files section final = "{}{:03d}_{:4.3f}.re".format(PREFIX_EQ, step_n, lambda_initial) dcd = "{}{:03d}_{:4.3f}.dcd".format(PREFIX_EQ, step_n, lambda_initial) files = { "final": final, "trajectory": dcd, "topology": top_fn, "fep": fep_fn } if first_frame_eq: files["energy"] = "{}{:03d}_{:4.3f}.en".format( PREFIX_EQ, step_n, lambda_initial) if rest_fn: files["restraint"] = rest_fn if step_n != 0: files["restart"] = "{}{:03d}_{:4.3f}.re".format( PREFIX_EQ, step_n - 1, lambda_initial) else: files["restart"] = relax_re_fn # parse the general input and update with step input and files section try: inp = QDynInput(gen_inp_s, ignore_errors=ignore_errors) inp.update(eq_step_inp_s) if "energy" in inp.parameters["intervals"]: files["energy"] = "{}{:03d}_{:4.3f}.en".format( PREFIX_EQ, step_n, lambda_initial) elif first_frame_eq: raise QGenfepsError("Argument 'first_frame_eq' requires the " "energy printout defined in the intervals " "section of the equilibration " "(e.g. 'energy 10')") inp.update(parameters={"files": files}) inp.update( parameters={ "lambdas": "{:9.7f} {:9.7f}" "".format(lambda_initial, 1 - lambda_initial) }) except QDynInputError as err_msg: raise QGenfepsError("Problem with equil. step no. {}: {}" "".format(step_n, err_msg)) # test the input string try: _ = inp.get_string() except QDynInputError as err_msg: raise QGenfepsError("Error in equil. step {}: {}" "".format(step_n, err_msg)) # check if random seed is not defined or is fixed in the first step if step_n == 0: if repeats > 1: if ("random_seed" not in inp.parameters["md"]) or \ (int(inp.parameters["md"]["random_seed"]) > 0): raise QGenfepsError("Fixed random seed (or restart " "velocities) works only with one " "repeat (others will be identical).\n" "Please use 'random_seed -1' in " "your first equilibration step to " "generate random random seeds.") elif "random_seed" not in inp.parameters["md"]: logger.info("No random seed in first step of equilibration," "using restart velocities.") if (not rest_file and rest_fn) or (not rest_fn and rest_file) \ or (rest_file and (os.path.basename(rest_file) != rest_fn)): logger.warning("This will not be a true continuation run! " "The relaxation restraint does not match " "yours. Use 'inp' instead of 'top' or " "'relax' for the restraint.") # append the input eq_steps.append(inp) ################# # make FEP inputs en_filenames = [] feps = [] for step_n, lam in enumerate(lambdas): # create the files section final = "{}{:03d}_{:4.3f}.re".format(PREFIX_FEP, step_n, lam) dcd = "{}{:03d}_{:4.3f}.dcd".format(PREFIX_FEP, step_n, lam) en = "{}{:03d}_{:4.3f}.en".format(PREFIX_FEP, step_n, lam) files = { "final": final, "trajectory": dcd, "topology": top_fn, "energy": en, "fep": fep_fn } # if this step is in new direction (backwards) then # set the previous lambda and step to initial if backward_lambdas and lam == backward_lambdas[0]: prev_fep = feps[0] elif step_n == 0: prev_fep = eq_steps[-1] else: prev_fep = feps[-1] # if this flag is set, all steps that point to the first step # should point to the last eq step if first_frame_eq: if step_n == 1 or (backward_lambdas and lam == backward_lambdas[0]): prev_fep = eq_steps[-1] if rest_fn: files["restraint"] = rest_fn files["restart"] = prev_fep.parameters["files"]["final"] # update the parameters and check the input try: inp = QDynInput(gen_inp_s, ignore_errors=ignore_errors) inp.update(fep_inp_s) if "energy" not in inp.parameters["intervals"]: raise QGenfepsError("FEP stage requires the energy printout " "defined in the intervals section " "(e.g. 'energy 10')") inp.update(parameters={"files": files}) inp.update(parameters={ "lambdas": "{:9.7f} {:9.7f}" "".format(lam, 1 - lam) }) inp.check() except QDynInputError as err_msg: raise QGenfepsError("Error in FEP step {}: {}" "".format(step_n, err_msg)) # append the input feps.append(inp) # add the energy filename to the list en_filenames.append(inp.parameters["files"]["energy"]) # if first_frame_eq is set add the energy file and remove the # first fep frame if first_frame_eq: logger.info("Replacing the first FEP frame with the last " "equilibration step") en_filenames[0] = eq_steps[-1].parameters["files"]["energy"] feps.pop(0) # check random seed in fep if "random_seed" in feps[0].parameters["md"] and \ int(feps[0].parameters["md"]["random_seed"]) < 1: logger.warning("Generating random seeds in FEP inputs. " "Are you sure this is ok?") # write a file that contains the names of all energy files in proper order # this file is used later by q_mapper.py # sort the enfiles according to lambda (1.0 -> 0.0) so that the mapping # will always go from reactants to products enfiles_lambdas = sorted([(enf.split("_")[-1], i) for i, enf in \ enumerate(en_filenames)], reverse=True) en_filenames_sorted = [en_filenames[i] for l, i in enfiles_lambdas] enf = os.path.join(TMPDIR, energy_list_fn) open(enf, 'w').write("\n".join(en_filenames_sorted)) # create directories for repeats/replicas (rep_000,rep_001,rep_002...) # copy everything from TMPDIR (topology, fep file, relax restart and # restraint file (if any)); create the eq and fep inputs # # first check for existing directories for num in xrange(0, repeats): rep = "{}{:03d}".format(prefix, num) if os.path.lexists(rep): raise QGenfepsError("Directory '{}' exists. Please (re)move it or " "change the prefix with --prefix.".format(rep)) lsdir = os.listdir(TMPDIR) rep_dirs = [] for num in xrange(0, repeats): rep = "{}{:03d}".format(prefix, num) os.mkdir(rep) # copy stuff from TMPDIR for f in lsdir: shutil.copy2(os.path.join(TMPDIR, f), rep) # create eq inputs for step_n, eq_step in enumerate(eq_steps): # check if random seed is a fixed value or not (generate random or fail) eqs = copy.deepcopy(eq_step) # a copy if "random_seed" in eqs.parameters["md"] and \ int(eqs.parameters["md"]["random_seed"]) < 1: rs = random.randint(1, 1e6) eqs.update(parameters={"md": {"random_seed": rs}}) try: s = eqs.get_string() except QDynInputError as err_msg: raise QGenfepsError("Error in step {}: {}" "".format(step_n, err_msg)) fn = os.path.join( rep, "{}{:03d}_{:4.3f}.inp" "".format(PREFIX_EQ, step_n, lambda_initial)) s = header_comment + s open(fn, 'w').write(s) last_eq_fn = fn # create FEP inputs for step_n, fep in enumerate(feps): if first_frame_eq: step_n += 1 fs = copy.deepcopy(fep) # a copy if "random_seed" in fs.parameters["md"] and \ int(fs.parameters["md"]["random_seed"]) < 1: rs = random.randint(1, 1e6) fs.update(parameters={"md": {"random_seed": rs}}) try: s = fs.get_string() except QDynInputError as err_msg: raise QGenfepsError("Error in step {}: {}" "".format(step_n, err_msg)) lam = lambdas[step_n] # feps was created in lambdas iteration fn = os.path.join( rep, "{}{:03d}_{:4.3f}.inp" "".format(PREFIX_FEP, step_n, lam)) s = header_comment + s open(fn, 'w').write(s) logger.info("Created inputs for repeat/replica '{}'.".format(rep)) rep_dirs.append(rep) # get the amount of storage that will be wasted # for this we need the atom count from the topology for line in open(os.path.join(TMPDIR, top_fn), 'r').readlines(1024): if "no. of atoms, no. of solute atoms" in line: num_atoms_all = int(line.split()[0]) break REST_B_PER_ATOM = 48.0 TRJ_B_PER_ATOM = 12.0 # very rough estimate, depends on Q version # it can double if group_contributions are calculated EN_B_PER_STEP = 370.0 CONV_MB = 2**20 # very rough estimate OUT_B_PER_STEP = 2000 TEMP_B_PER_STEP = 160 NB_B_PER_STEP = 80 # intervals maps: q_parameter_key, q_default_value, approx_bytes_per_frame qintervals = { "trj": ["trajectory", 100, num_atoms_all * TRJ_B_PER_ATOM], "log": ["output", 10, OUT_B_PER_STEP], "temp": ["temperature", 10, TEMP_B_PER_STEP], "en": ["energy", 10, EN_B_PER_STEP], "nb": ["non_bond", 10, NB_B_PER_STEP] } total_data = {"trj": 0, "log": 0, "en": 0, "rest": 0} # calculate approx amount of data for i, step in enumerate(eq_steps + feps): data = {} mdsteps = int(step.parameters["md"]["steps"]) for k, v in qintervals.iteritems(): interval_key = v[0] default_interval = v[1] bytes_per_step = v[2] try: interval = int(step.parameters["intervals"][interval_key]) data[k] = mdsteps / interval * bytes_per_step except KeyError: # default data[k] = mdsteps / default_interval * bytes_per_step except ZeroDivisionError: data[k] = 0 # no printout finally: # if energy or trajectory, check that files for output are # defined, otherwise set the printout to 0 if interval_key in ("energy", "trajectory") and not \ interval_key in step.parameters["files"].keys(): data[k] = 0 trj_data = data["trj"] en_data = data["en"] log_data = (data["log"] + data["temp"] + data["nb"]) rest_data = num_atoms_all * REST_B_PER_ATOM total_data["trj"] += trj_data total_data["log"] += log_data total_data["en"] += en_data total_data["rest"] += rest_data data = (trj_data + log_data + rest_data + en_data) / CONV_MB logger.info("Your runs will waste approx. {:.2f} MB of storage. " "Per replica: {:.2f} MB (trj: {:.1f}, log: {:.1f}, " "en: {:.1f}, rest: {:.1f})".format( sum(total_data.values()) / CONV_MB * repeats, sum(total_data.values()) / CONV_MB, total_data["trj"] / CONV_MB, total_data["log"] / CONV_MB, total_data["en"] / CONV_MB, total_data["rest"] / CONV_MB)) # remove temporary directory shutil.rmtree(TMPDIR) return rep_dirs
def test_read_ffld_wrong_order_fail(self): # see if it fails with PDB with wrong atom-order qlib = QLib("oplsaa") qstruct = QStruct("data/ace_ash_nma_bad.pdb", "pdb") with pytest.raises(QLibError): qlib.read_ffld("data/ace_ash_nma.ffld11", qstruct)
def test_ff14sb_conversion(): # Amber14FF to Qamber14 # # Convert Amber14 lib (+prepin for impropers) and parm+frcmod to Q lib/prm # Load the structure 'all_amino_acids.pdb' and build the topology # Check the total bonding energy contributions and number of bonding terms # and compare the library and parameter set with official qamber14. # qal = QLib("amber") qap = QPrm("amber", ignore_errors=True) # duplicates qal.read_amber_lib("data/ff-amber14/amber12_mod.lib") qal.read_amber_lib("data/ff-amber14/arn.lib") qal.read_prepin_impropers("data/ff-amber14/prep/amino12.in") qal.read_prepin_impropers("data/ff-amber14/arn.prepi") qap.read_amber_parm("data/ff-amber14/parm/parm10.dat") qap.read_amber_frcmod("data/ff-amber14/parm/frcmod.ff14SB") # add options to parameters for line in """\ name Q-Amber14SB type AMBER vdw_rule arithmetic !vdW combination rule (geometric or arithmetic) scale_14 0.8333 ! electrostatic 1-4 scaling factor switch_atoms off improper_potential periodic improper_definition explicit\ """.splitlines(): lf = line.split() qap.options[lf[0]] = " ".join(lf[1:]) # remove head from ACE and tail from NME cons = qal.residue_dict["ACE"].connections cons = [con for con in cons if "head" not in con] qal.residue_dict["ACE"].connections = cons cons = qal.residue_dict["NME"].connections cons = [con for con in cons if "tail" not in con] qal.residue_dict["NME"].connections = cons qas1 = QStruct("data/all_amino_acids.pdb", "pdb", ignore_errors=True) qat = QTopology(qal, qap, qas1) q_tors = sum([len(list(tor.prm.get_prms())) for tor in qat.torsions]) assert len(qat.bonds) == 464 assert len(qat.angles) == 829 assert len(qat.torsions) == 1221 assert q_tors == 1950 assert len(qat.impropers) == 102 be = sum([bond.calc()[0] for bond in qat.bonds]) ae = sum([ang.calc()[0] for ang in qat.angles]) te = sum([tor.calc()[0] for tor in qat.torsions]) ie = sum([imp.calc()[0] for imp in qat.impropers]) assert is_close(be, 181.2572830) assert is_close(ae, 212.8539304) assert is_close(te, 417.2919960) assert is_close(ie, 22.8171235) # compare with official lib qa14_lib = open("data/qamber14.lib", "r").read() qa14_prm = open("data/qamber14.prm", "r").read() assert qal.get_string() in qa14_lib assert qap.get_string() in qa14_prm
def test_convert_oplsaa(self): qlib = QLib("oplsaa") qstruct = QStruct("data/ace_ash_nma.pdb", "pdb") qlib.read_ffld("data/ace_ash_nma.ffld11", qstruct) ql_str = qlib.get_string() assert ql_str == open("data/ace_ash_nma.lib").read()
def test_read_ffld_fail(self): # no residues found qlib = QLib("oplsaa") qstruct = QStruct("data/ace_ash_nma.pdb", "pdb") with pytest.raises(QLibError): qlib.read_ffld("data/ace_ash_nma.pdb", qstruct)
for parm in args.parms: try: qprm.read_amber_parm(parm) except QPrmError as e: print("FATAL! Problem with parm: {}".format(str(e))) sys.exit(1) for frcmod in args.frcmods: try: qprm.read_amber_frcmod(frcmod) except QPrmError as e: print("FATAL! Problem with frcmod: {}".format(str(e))) sys.exit(1) try: qstruct = QStruct(args.mol2[0], "mol2") except QStructError as e: print("FATAL! Problem with mol2: {}".format(str(e))) sys.exit(1) try: qtop = QTopology(qlib, qprm, qstruct) except QTopologyError as e: print("FATAL! Problem building the topology: {}".format(str(e))) sys.exit(1) # # get total and max energies and number of parameters # data, total_e, max_e, nprm = {}, {}, {}, {}
def test_read_pdb_fail3(self): # random input with pytest.raises(QStructError): QStruct("data/all_amino_acids_bad.mol2", "pdb")
qap.options[lf[0]] = " ".join(lf[1:]) # remove head from ACE and tail from NME cons = qal.residue_dict["ACE"].connections cons = [con for con in cons if "head" not in con] qal.residue_dict["ACE"].connections = cons cons = qal.residue_dict["NME"].connections cons = [con for con in cons if "tail" not in con] qal.residue_dict["NME"].connections = cons open("qamber14_gen.lib", "w").write(qal.get_string()) open("qamber14_gen.prm", "w").write(qap.get_string()) print "# Topology with converted parameters (and mol2):" qas1 = QStruct("../all_amino_acids.mol2", "mol2") qat = QTopology(qal, qap, qas1) q_tors = sum([len(list(tor.prm.get_prms())) for tor in qat.torsions]) print "Bonds: ", len(qat.bonds) print "Angles: ", len(qat.angles) print "Torsions: ", len(qat.torsions) print "Q Torsions (diff parms): ", q_tors print "Impropers: ", len(qat.impropers) print "Bond energy: ", sum([bond.calc()[0] for bond in qat.bonds]) print "Angle energy: ", sum([ang.calc()[0] for ang in qat.angles]) print "Torsion energy: ", sum([tor.calc()[0] for tor in qat.torsions]) print "Improper energy: ", sum([imp.calc()[0] for imp in qat.impropers]) print print "# Topology with pre-made parameters (and pdb):" qal2 = QLib("amber")
def test_convert_placeholders_fail(self): qstruct = QStruct("data/ace_ash_nma.pdb", "pdb") with pytest.raises(QStructError): qstruct.convert_placeholders("$1.CB$")
def make_fep(qmap_file, pdb_file, forcefield, parm_files, lib_files, ignore_errors=False): """Generate a template FEP file for EVB simulations in Q. Parses a QMAP file (see below), state 1 structure file (PDB) and all states libraries and parameters, and determines the changes in connectivity/charges/parameters that occur between the states. QMAP is a text file that defines mappings of library ids (for each state) to state 1 structure/topology ids, best explained on an example: q 315.O OHH.O OHH.O q 315.H1 OHH.H1 HIP.HE2 q 315.H2 OHH.H2 OHH.H2 q 155.NE2 HID.NE2 HIP.NE2 ... n 155.CA HID.CA HIP.CA The first column defines the atom as being a 'Q' atom or a 'neighbouring' atom. The latter will not be included in the 'Q-region' but will be included in the 'change_bonds/change_angles...' sections in case there is a change in bonding/parameters outside the Q region. Additionally, you can define a 'q' atom with 'q_qcp', which will create an additional section for isotopically clean masses used in QCP calculations. The second column is the PDB ID, comprised of residue index and atom name, separated by a dot. The third column is the library ID of this atom in state 1, comprised of residue name and atom name (should be the same as in the structure). The fourth column is the library ID of this atom in state 2. Additional columns can be added for other states. The returned template string contains several missing parts, denoted with <FIX>, which have to be manually replaced with appropriate values. These include the softpair C parameters, Morse parameters, Hij parameters. Args: qmap_file (string): QMAP file path pdb_file (string): state 1 PDB file path (the one built with qprep) forcefield (string): forcefield type (see SUPPORTED_FF) prms_files (list): Q parameter-file paths libs_files (list): Q library-file paths ignore_errors (boolean, optional): don't fail on certain non critical errors Returns: fepstr (string): fepfile template Raises: QMakeFepError """ if forcefield not in SUPPORTED_FF: raise QMakeFepError("Force field '{}' not supported. Use {}" "".format(forcefield, " or ".join(SUPPORTED_FF))) fep_types = { "atoms": [], "bonds": [], "angles": [], "torsions": [], "impropers": [] } fep_changes = { "atoms": [], "charges": [], "bonds": ODict(), "angles": ODict(), "torsions": ODict(), "impropers": ODict() } fep_qcp_atoms = [] fep_morse_prms = {} fep_reacting_atoms = set() num_evb_states = None # parse the MAP file # pdb_ids_map = [ ('q', [pdbid1_state1,]), # ('q', [pdbid2_state1,]), # ... # ('n', [pdbid11_state1,]), # ... # ] # lib_ids_map = [ [lib_id1_state1, lib_id2_state1...], # [lib_id1_state2, lib_id2_state2...], # ... # ] # lib_ids_map = [] pdb_ids_map = [] with open(qmap_file, 'r') as qatom_map: for i, line in enumerate(qatom_map.readlines()): line = re.split("#|\*|\!", line, 1)[0].strip() # remove comments if line == "": continue c = line.split() atom_type = c[0].lower() pdb_id = c[1] lib_ids = c[2:] if atom_type not in ["q", "n", "q_qcp"]: raise QMakeFepError("Lines in the QMAP file should begin " "with either 'q' (qatom) or 'n' " "(neighboring atom) or 'q_qcp' " "(QPI q atom)") try: resid, name = pdb_id.split(".") if not name or not int(resid): raise ValueError except ValueError: raise QMakeFepError("Invalid PDB ID '{}'. Should be " "RESID.ATOMNAME".format(pdb_id)) tmp = (atom_type, [ pdb_id, ]) if tmp in pdb_ids_map: raise QMakeFepError("Duplicate PDB ID: '{}'".format(pdb_id)) pdb_ids_map.append(tmp) if num_evb_states == None: num_evb_states = len(lib_ids) elif len(lib_ids) != num_evb_states: raise QMakeFepError("Number of states in line '{}' not equal " "to number of PDB files".format(line)) for state, lib_id in enumerate(lib_ids): try: resname, name = lib_id.split(".") if not resname or not name: raise ValueError except ValueError: raise QMakeFepError("Invalid library ID '{}'. Should be " "RESNAME.ATOMNAME".format(lib_id)) try: if lib_id in lib_ids_map[state]: raise QMakeFepError("The library IDs in one EVB state " "should be unique (double '{}'), " "otherwise proper bonding can't " "be determined.".format(lib_id)) except IndexError: lib_ids_map.append([]) lib_ids_map[state].append(lib_id) # load libraries qlib = QLib(forcefield, ignore_errors=ignore_errors) for lib in lib_files: try: qlib.read_lib(lib) except QLibError as e: raise QMakeFepError("Problem parsing lib ({}): {}" "".format(lib, e)) # make dummy structures for other states structures = [None for _ in range(num_evb_states)] structures[0] = pdb_file libid_pdbid_map = [{} for _ in range(num_evb_states)] for state in range(1, num_evb_states): state_structure = [] atom_index = 1 processed_residues = [] for i, (q_or_n, pdb_ids_all_states) in enumerate(pdb_ids_map): lib_id = lib_ids_map[state][i] resname, aname = lib_id.split(".") # add all atoms of current residue to the dummy structure # at the same time, storing the mapping lib_id:pdb_id # in libid_pdbid_map for later if resname not in processed_residues: try: residue_lib = qlib.residue_dict[resname] except KeyError: raise QMakeFepError("Residue '{}' not found in library." "".format(resname)) processed_residues.append(resname) res_index = len(processed_residues) for atom in residue_lib.atoms: lib_id2 = "{}.{}".format(resname, atom.name) pdb_id2 = "{}.{}".format(res_index, atom.name) state_structure.append("{:<6}{:5} {:4} {:3} {:5} " "{:8.3f}{:8.3f}{:8.3f}" "".format("ATOM", atom_index, atom.name, resname, res_index, 0, 0, 0)) atom_index += 1 # map the newly created dummy atom's pdb_id to lib_id libid_pdbid_map[state][lib_id2] = pdb_id2 # add pdb_id of current atom in current (dummy structure) # state to pdb_ids_map (using its lib_id) try: pdb_id_this_state = libid_pdbid_map[state][lib_id] except KeyError: raise QMakeFepError( "Library ID '{}' not valid.".format(lib_id)) pdb_ids_all_states.append(pdb_id_this_state) _, structures[state] = tempfile.mkstemp() open(structures[state], "w").write("\n".join(state_structure)) # DEBUG # print "Dummy PDB for st.{}: {}".format(state + 1, structures[state]) # load parameters qprm = QPrm(forcefield, ignore_errors=ignore_errors) for parm in parm_files: try: qprm.read_prm(parm) except QPrmError as e: raise QMakeFepError("Problem with parm ({}): {}" "".format(parm, e)) # load structures and make topologies topologies = [] for state in range(num_evb_states): try: qstruct = QStruct(structures[state], "pdb", ignore_errors=ignore_errors) except QStructError as e: raise QMakeFepError("Problem parsing PDB file ({}): {} " "".format(structures[state], e)) try: topologies.append(QTopology(qlib, qprm, qstruct)) except QTopologyError as e: raise QMakeFepError("Problem building the topology: {}" "".format(e)) # Make _TopoAtom (atoms in QTopology) maps out of qmap's lists # and extract types, type changes and charge changes # # atom_map = [ [_TopoAtom1_state1, _TopoAtom1_state2, ... ], # [_TopoAtom2_state1, _TopoAtom2_state2, ... ], # [_TopoAtom3_state1, _TopoAtom3_state2, ... ], # ... # ] atom_map = [] for i, (q_or_n, pdb_id_all_states) in enumerate(pdb_ids_map): atom_all_states = [] for state, pdb_id in enumerate(pdb_id_all_states): residue, aname = pdb_id.split(".") try: residue = topologies[state].residues[int(residue) - 1] atom = [a for a in residue.atoms if a.name == aname][0] except (KeyError, IndexError) as e: raise QMakeFepError("Atom '{}' doesn't exist in PDB '{}'" "".format(pdb_id, structures[state])) atom_all_states.append(atom) atom_map.append(atom_all_states) # check for stupidity - lib_id in QMAP state 1 # not matching the structure/topology lib_id_qmap = lib_ids_map[0][i] lib_id = "{}.{}".format(atom_all_states[0].residue.name, atom_all_states[0].name) if lib_id != lib_id_qmap: pdb_id = pdb_ids_map[i][1][0] raise QMakeFepError("QMAP state 1 library ID ({}) of atom '{}' " "doesn't match topology library ID ({})." "".format(lib_id_qmap, pdb_id, lib_id)) # For Q atoms (and not the neighbor atoms): # get FEP atom types, type changes and charge changes if q_or_n in ["q", "q_qcp"]: for atom in atom_all_states: if atom.prm not in fep_types["atoms"]: fep_types["atoms"].append(atom.prm) fep_changes["charges"].append([a.charge for a in atom_all_states]) fep_changes["atoms"].append(atom_all_states) if q_or_n == "q_qcp": fep_qcp_atoms.append(atom_all_states) charge_sums = [] for state in range(num_evb_states): charge_sum = sum([c[state] for c in fep_changes["charges"]]) if abs(round(charge_sum) - charge_sum) > 1e-6: raise_or_log("Net charge in state {} not integer: {}" "".format(state + 1, charge_sum), QMakeFepError, logger, ignore_errors=ignore_errors) charge_sums.append(charge_sum) if any([abs(c - charge_sums[0]) > 1e-6 for c in charge_sums]): logger.warning("Net charge changes between states: {}" "".format(" -> ".join([str(c) for c in charge_sums]))) # get all Bonds, Angles, Torsions and Impropers which include # at least one atom defined in qmap batis = {"bonds": [], "angles": [], "torsions": [], "impropers": []} batis["bonds"] = [set() for _ in range(num_evb_states)] batis["angles"] = [set() for _ in range(num_evb_states)] batis["torsions"] = [set() for _ in range(num_evb_states)] batis["impropers"] = [set() for _ in range(num_evb_states)] for atom_all_states in atom_map: for state, atom in enumerate(atom_all_states): _ = [batis["bonds"][state].add(b) for b in atom.bonds] _ = [batis["angles"][state].add(a) for a in atom.angles] _ = [batis["torsions"][state].add(t) for t in atom.torsions] _ = [batis["impropers"][state].add(i) for i in atom.impropers] # map the bonds,angles,torsions,impropers (bati) in different states # to same key (ordered list of state1 PDB_IDs) # # bati_map = # { "bonds": {state1_bond1_key: [bond1_state1, bond1_state2,...], # state1_bond2_key: [bond2_state1, bond2_state2,...], ...}, # "angles": {state1_angle1_key: [angle1_state1, angle1_state2,...],...} # ... } # # also, include only batis which have all atoms defined in qmap # also, detect inter-residue batis and raies QMakeFepError bati_map = {"bonds": {}, "angles": {}, "torsions": {}, "impropers": {}} for state in range(num_evb_states): atoms_in_state = [a_all_st[state] for a_all_st in atom_map] for bati_type in bati_map: for bati in batis[bati_type][state]: # find the corresponding atoms in state1 try: atoms_st1 = [ atom_map[atoms_in_state.index(a)][0] for a in bati.atoms ] except ValueError: # one of the Atoms is not defined in QMAP continue pdbid_index = [] for atom in atoms_st1: pdbid_index.append( (atom.index, "{}.{}".format(atom.residue.index, atom.name))) # order the pdbids to prevent double entries if bati_type == "bonds": pids = sorted(pdbid_index) elif bati_type == "angles": pids = min(pdbid_index, list(reversed(pdbid_index))) elif bati_type == "torsions": pids = min(pdbid_index, list(reversed(pdbid_index))) elif bati_type == "impropers": # topology order == library order == correct order pids = pdbid_index key = " ".join([p[1] for p in pids]) # check for bonds/angles/torsions/impropers that are # shared between residues residue_ids = set(atom.residue.index for atom in bati.atoms) if len(residue_ids) > 1: raise QMakeFepError("Inter-residue bond/angle/torsion '{}'" " not supported. Combine the residues " "into a single library entry if you " "want to make changes over the " "'head-tail' bond.".format(key)) # add bati to bati_map try: bati_map[bati_type][key][state] = bati except KeyError: bati_map[bati_type][key] = [ None for _ in range(num_evb_states) ] bati_map[bati_type][key][state] = bati # DEBUG # for k,v in bati_map.iteritems(): # print k # for k2, v2 in v.iteritems(): # print k2, v2[0], v2[1] def _bati_sort(key, bati_all_states): # to sort bonds/angles.. based on the key # also, breaking and forming bonds have priority try: return (-1 * bati_all_states.index(None), key) except: return (1, key) # find changes between states (add to fep_changes dict) for bati_type, batis in bati_map.iteritems(): for bati_key, bati_all_states in sorted(batis.items(), key=lambda (key, val): \ _bati_sort(key, val)): # bond/angle/.. breaking or forming if None in bati_all_states: fep_changes[bati_type][bati_key] = bati_all_states # add bond atoms to "reactive atoms" set # and replace the bond parameter with a Morse type if bati_type == "bonds": for bati in bati_all_states: if bati != None: fep_reacting_atoms |= set(bati.atoms) # the bond parameter is replaced with a Morse # parameter (_FepPrmMorse) prm_id = bati.prm.prm_id try: bati.prm = fep_morse_prms[prm_id] except KeyError: bati.prm = _FepPrmMorse(bati.prm) fep_morse_prms[prm_id] = bati.prm # the actual values of the parameters are not exactly the same else: tmp = [ bati_all_states[0].prm.strval == bati.prm.strval for bati in bati_all_states ] if not all(tmp): fep_changes[bati_type][bati_key] = bati_all_states # DEBUG # for k,v in fep_changes.iteritems(): # print k # try: # for k2,(v1,v2) in v.iteritems(): # print k2,v1,v2 # except: # for (v1,v2) in v: # print v1,v2 # add parameters of changing batis to fep_types for bati_type in bati_map: for bati_all_states in fep_changes[bati_type].values(): prms = [bati.prm for bati in bati_all_states if bati != None] for prm in prms: if prm not in fep_types[bati_type]: fep_types[bati_type].append(prm) # DEBUG # for k,v in fep_types.iteritems(): # print k # for v2 in v: # print v2 # add reactive atoms from states that have bond==None to fep_reacting_atoms for atom_all_states in fep_changes["atoms"]: for atom in atom_all_states: if atom in fep_reacting_atoms: fep_reacting_atoms |= set(atom_all_states) ######################## # Prepare the output ######################## fep_l = { "atoms": [], "atom_types": [], "qcp_mass": [], "change_atoms": [], "change_charges": [], "soft_pairs": [], "off_diagonals": [], "bond_types": [], "change_bonds": [], "angle_types": [], "change_angles": [], "torsion_types": [], "change_torsions": [], "improper_types": [], "change_impropers": [] } #################### # ATOMS # CHANGE_ATOMS # CHANGE_CHARGES #################### format_atoms = "{:<15} {:<10} # {:<15} {:<15} {:>3}" format_ch_atoms = "{:<10} " + " {:<12}" * num_evb_states + " # {:<}" format_ch_crgs = "{:<10} " + " {:12}"*num_evb_states + " # {:<10}"\ + " {:>12}"*(num_evb_states-1) format_qcp = "{:<15} {:<10} # {:<10}" fep_l["atoms"].append( format_atoms.format("#Q index", "PDB index", "St.1 PDB_ID", "St.1 LIB_ID", "")) tmp = ["#Q index"] tmp.extend(["Type st.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_ID") fep_l["change_atoms"].append(format_ch_atoms.format(*tmp)) tmp = ["#Q index"] tmp.extend(["Charge st.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_ID") tmp.extend( ["dq({}->{})".format(n + 1, n + 2) for n in range(num_evb_states - 1)]) fep_l["change_charges"].append(format_ch_crgs.format(*tmp)) if fep_qcp_atoms: fep_l["qcp_mass"].append("[qcp_mass]") fep_l["qcp_mass"].append( format_qcp.format("#Q index", "Mass", "St.1 PDB_ID")) for i, atom_all_states in enumerate(fep_changes["atoms"]): q_index = i + 1 a = atom_all_states[0] pdb_id = "{}.{}".format(a.residue.index, a.name) lib_id = "{}.{}".format(a.residue.name, a.name) # atoms reacting_flag = " !" * bool( [atom for atom in atom_all_states if atom in fep_reacting_atoms]) fep_l["atoms"].append( format_atoms.format(q_index, "$" + pdb_id + "$", pdb_id, lib_id, reacting_flag)) # change_atoms tmp = [q_index] + [a.prm.prm_id for a in atom_all_states] + [pdb_id] fep_l["change_atoms"].append(format_ch_atoms.format(*tmp)) # charges crgs = [float(a.charge) for a in atom_all_states] tmp = [q_index] + crgs + [pdb_id] \ + [crgs[n+1]-crgs[n] for n in range(num_evb_states-1)] fep_l["change_charges"].append(format_ch_crgs.format(*tmp)) # qcp_atoms if atom_all_states in fep_qcp_atoms: fep_l["qcp_mass"].append( format_qcp.format(q_index, "<FIX>", pdb_id)) ############### # ATOM_TYPES ############### format_atypes = "{:<12} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10}" if forcefield == "amber": fep_l["atom_types"].append( format_atypes.format("#Atom_type", "LJ_Rm", "LJ_eps", "SP_Ci", "SP_ai", "LJ_Rm", "LJ_eps_14", "mass")) else: fep_l["atom_types"].append( format_atypes.format("#Atom_type", "LJ_A", "LJ_B", "SP_Ci", "SP_ai", "LJ_A_14", "LJ_B_14", "mass")) fep_reacting_atoms_prms = [a.prm for a in fep_reacting_atoms] for prm in fep_types["atoms"]: sp_c = 1 sp_a = 2.5 if prm in fep_reacting_atoms_prms: sp_c = "<FIX>" if forcefield == "amber": lj1, lj2 = prm.lj_R, prm.lj_eps lj3, lj4 = lj1, round(lj2 / 1.2, 4) else: lj1, lj2 = prm.lj_A, prm.lj_B lj3, lj4 = round(lj1 / (2**0.5), 4), round(lj2 / (2**0.5), 4) fep_l["atom_types"].append( format_atypes.format(prm.prm_id, lj1, lj2, sp_c, sp_a, lj3, lj4, prm.mass)) ############### # BOND_TYPES ############### format_hbonds = "{:<8} {:>10} {:>10} # {}" format_mbonds = "{:<8} {:^10} {:^10} {:>10} # {}" fep_l["bond_types"].append("## Harmonic format") fep_l["bond_types"].append( format_hbonds.format("#Index", "Fc", "r0", "PRM_ID")) fep_l["bond_types"].append("## Morse format") fep_l["bond_types"].append( format_mbonds.format("#Index", "D", "alpha", "r0", "PRM_ID")) for i, bond_type in enumerate(fep_types["bonds"]): b_index = i + 1 if isinstance(bond_type, _FepPrmMorse): prm_id = "-".join(bond_type.harmonic_prm.prm_id.split()) tmp = format_mbonds.format(b_index, "<FIX_D>", "<FIX_a>", "<FIX_r0>", prm_id) fep_l["bond_types"].append(tmp) else: prm_id = "-".join(bond_type.prm_id.split()) tmp = format_hbonds.format(b_index, bond_type.fc, bond_type.r0, prm_id) fep_l["bond_types"].append(tmp) ############### # CHANGE_BONDS ############### format_bondch = "{:<10} {:<10} " + "{:^5} " * num_evb_states + " # {}" tmp = ["#Atom1", "Atom2"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_bonds"].append(format_bondch.format(*tmp)) for bond_key, bond_all_states in fep_changes["bonds"].iteritems(): # bond_key == "PDB_ID1 PDB_ID2" prm_indexes = [] for b in bond_all_states: if b == None: prm_indexes.append(0) else: btype_index = fep_types["bonds"].index(b.prm) + 1 prm_indexes.append(btype_index) placeholders = ["${}$".format(a) for a in bond_key.split()] pdb_id = "-".join(bond_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_bonds"].append(format_bondch.format(*tmp)) ############### # ANGLE_TYPES ############### format_angles = "{:<8} {:>10} {:>10} # {}" fep_l["angle_types"].append( format_angles.format("#Index", "Fc", "theta0", "PRM_ID")) for i, angle_type in enumerate(fep_types["angles"]): an_index = i + 1 prm_id = "-".join(angle_type.prm_id.split()) tmp = format_angles.format(an_index, angle_type.fc, angle_type.theta0, prm_id) fep_l["angle_types"].append(tmp) ################# # CHANGE_ANGLES ################# format_angch = "{:<10} {:<10} {:<10} " + "{:^5} " * num_evb_states + " # {}" tmp = ["#Atom1", "Atom2", "Atom3"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_angles"].append(format_angch.format(*tmp)) for angle_key, angle_all_states in fep_changes["angles"].iteritems(): # angle_key == "PDB_ID1 PDB_ID2 PDB_ID3" prm_indexes = [] for ang in angle_all_states: if ang == None: prm_indexes.append(0) else: atype_index = fep_types["angles"].index(ang.prm) + 1 prm_indexes.append(atype_index) placeholders = ["${}$".format(a) for a in angle_key.split()] pdb_id = "-".join(angle_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_angles"].append(format_angch.format(*tmp)) ################# # TORSION_TYPES ################# format_torsions = "{:<8} {:>10} {:>10} {:>10} # {}" fep_l["torsion_types"].append( format_torsions.format("#Index", "Fc", "mult", "psi0", "PRM_ID")) tor_index = 1 tor_indexes = [] for i, torsion_type in enumerate(fep_types["torsions"]): prm_id = "-".join(torsion_type.prm_id.split()) prm_indexes = [] for fc, per, psi0, npath in torsion_type.get_prms(): fc = fc / npath tmp = format_torsions.format(tor_index, fc, per, psi0, prm_id) fep_l["torsion_types"].append(tmp) prm_indexes.append(tor_index) tor_index += 1 tor_indexes.append(prm_indexes) ################### # CHANGE_TORSIONS ################### format_torch = "{:<10} {:<10} {:<10} {:<10} " \ + "{:^5} "*num_evb_states + " # {}" tmp = ["#Atom1", "Atom2", "Atom3", "Atom4"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_torsions"].append(format_torch.format(*tmp)) for torsion_key, torsion_all_states in fep_changes["torsions"].iteritems(): # torsion_key == "PDB_ID1 PDB_ID2 PDB_ID3 PDB_ID4" for state, tor in enumerate(torsion_all_states): if tor == None: continue for i in range(len(tor.prm.fcs)): tprm_index = fep_types["torsions"].index(tor.prm) ttype_index = tor_indexes[tprm_index][i] prm_indexes = [0 for _ in range(len(torsion_all_states))] prm_indexes[state] = ttype_index placeholders = ["${}$".format(t) for t in torsion_key.split()] pdb_id = "-".join(torsion_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_torsions"].append(format_torch.format(*tmp)) ################# # IMPROPER_TYPES ################# format_impropers = "{:<8} {:>10} {:>10} # {}" fep_l["improper_types"].append( format_impropers.format("#Index", "Fc", "phi0", "PRM_ID")) for i, improper_type in enumerate(fep_types["impropers"]): imp_index = i + 1 prm_id = "-".join(improper_type.prm_id.split()) tmp = format_impropers.format(imp_index, improper_type.fc, improper_type.phi0, prm_id) fep_l["improper_types"].append(tmp) ################### # CHANGE_IMPROPERS ################### format_impch = "{:<10} {:<10} {:<10} {:<10} " \ + "{:^5} "*num_evb_states + " # {}" tmp = ["#Atom1", "Atom2", "Atom3", "Atom4"] tmp.extend(["St.{}".format(n + 1) for n in range(num_evb_states)]) tmp.append("St.1 PDB_IDs") fep_l["change_impropers"].append(format_impch.format(*tmp)) for improper_key, improper_all_states in fep_changes[ "impropers"].iteritems(): # improper_key == "PDB_ID1 PDB_ID2 PDB_ID3 PDB_ID4" prm_indexes = [] for imp in improper_all_states: if imp == None: prm_indexes.append(0) else: itype_index = fep_types["impropers"].index(imp.prm) + 1 prm_indexes.append(itype_index) placeholders = ["${}$".format(i) for i in improper_key.split()] pdb_id = "-".join(improper_key.split()) tmp = placeholders + prm_indexes + [pdb_id] fep_l["change_impropers"].append(format_impch.format(*tmp)) ############## # SOFT_PAIRS ############## for bond_key, bond_all_states in fep_changes["bonds"].iteritems(): if None in bond_all_states: for state, bond in enumerate(bond_all_states): if bond == None: continue atoms_in_state = [atom_all_states[state] for atom_all_states \ in fep_changes["atoms"]] a1_qindex = atoms_in_state.index(bond.atoms[0]) + 1 a2_qindex = atoms_in_state.index(bond.atoms[1]) + 1 fep_l["soft_pairs"].append("{:10} {:10}".format( a1_qindex, a2_qindex)) for k in fep_l.keys(): fep_l[k] = "\n".join(fep_l[k]) fepstr = """\ # Generated with Qtools, version {version} # Date: {date} # CWD: {cwd} # CMDline: {cmd} # [FEP] states {states} [atoms] {atoms} [atom_types] {atom_types} [change_atoms] {change_atoms} [change_charges] {change_charges} [soft_pairs] {soft_pairs} [off_diagonals] # State_i State_j Atom1 Atom2 A_ij mu_ij # ## Example1, Hij=H12=0 (not known in advance) ## 1 2 13 14 0 0 ## Example2, Hij=H12=C*exp(-mu * r_13_14) (C=20.0, mu=0.45) ## 1 2 13 14 20.0 0.45 # <FIX> [bond_types] {bond_types} [change_bonds] {change_bonds} [angle_types] {angle_types} [change_angles] {change_angles} [torsion_types] {torsion_types} [change_torsions] {change_torsions} [improper_types] {improper_types} [change_impropers] {change_impropers} {qcp_mass} """.format(states=num_evb_states, date=time.ctime(), cmd=" ".join(sys.argv), cwd=os.getcwd(), version=__version__, **fep_l) return fepstr
def test_notsupported(): with pytest.raises(QStructError): QStruct("data/all_amino_acids.pdb", "txt")
def test_read_mol2_fail2(self): # bad residue indexes will fail parsing with pytest.raises(QStructError): QStruct("data/all_amino_acids_bad2.mol2", "mol2")
for k, v in vars(args).iteritems(): if k in ['ffld_output', 'pdb'] and not os.path.lexists(v): print "FATAL! File '{}' doesn't exist.".format(v) sys.exit(1) # # create QLib, QPrm, QStruct and QTopology objects # qlib = QLib("oplsaa", ignore_errors=args.ignore_errors) qprm = QPrm("oplsaa", ignore_errors=args.ignore_errors) try: qstruct = QStruct(args.pdb, "pdb", ignore_errors=args.ignore_errors) except QStructError as err: print "FATAL! Problem with pdb: {}".format(err) sys.exit(1) try: qprm.read_ffld(args.ffld_output, qstruct) except QPrmError as err: print "FATAL! Problem with ffld file: {}".format(err) sys.exit(1) try: qlib.read_ffld(args.ffld_output, qstruct) except QLibError as err: print "FATAL! Problem with ffld file: {}".format(err)
def genrelax(relax_proc_file, outdir, restraint, top_file=None, fep_file=None, runscript_file=None, pdb_file=None, cont_file=None, ignore_errors=False): """Generates inputs for an MD simulation with Q (qdyn5). Arguments: relax_proc_file (string): genrelax procedure file pathname outdir (string): output directory restraint (string): restraint coordinate (a) Optional arguments (b) top_file (string): Q topology pathname fep_file (string): fep file pathname runscript_file (string): slurm/sge run script pdb_file (string): pdb pathname (used to convert placeholders) cont_file (string): pathname of previous qdyn5 input (continuation) ignore_errors (boolean): passed to QStruct and QDynInp - write to logger instead of raising exceptions on non-critical things (a) Restraint coordinate can be set to: 'top' - topology 'cont_inp' - whatever is defined in cont_file 'cont_final' - endpoint of previous simulation (final restart of cont_file) (b) top_file and cont_file are mutually exclusive, one of them has to be provided """ # check if files exist for k, v in locals().iteritems(): if k in [ "pdb_file", "cont_file", "relax_proc_file", "fep_file", "top_file", "runscript_file", "relax_input" ]: if v and not os.path.lexists(v): raise QGenrelaxError("File '{}' doesn't exist.".format(v)) if restraint not in ["top", "cont_inp", "cont_final"]: raise QGenrelaxError("Argument 'restraint' has to be either " "'cont_inp', 'top' or 'cont_final'") # constants PREFIX = "relax_" DIR = os.path.join(os.getcwd(), outdir) if os.path.lexists(DIR): raise QGenrelaxError("Directory '{}' exists. Please (re)move it " "or set 'outdir'.".format(DIR)) TMPDIR = tempfile.mkdtemp() header_comment = """\ # Generated with QTools, version {} # Date: {} # CWD: {} # Cmdline: {} """.format(__version__, time.ctime(), os.getcwd(), " ".join(sys.argv)) # find and replace placeholders. if not PDB was given to replace them, exit relax_proc_str = open(relax_proc_file, 'r').read() c = find_placeholders(relax_proc_str) if c and not pdb_file: raise QGenrelaxError("Found placeholders in proc.file, but no PDB " "was given: {}".format(", ".join(c))) elif c: logger.info("These placeholders will be replaced with atom indices: {}" "".format(", ".join(c))) try: qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors) relax_proc_str = qstruct.convert_placeholders(relax_proc_str) except QStructError as err_msg: raise QGenrelaxError("Failed to replace placeholders: " "{}".format(err_msg)) # get topology and fep and others from previous input if given (--cont) if cont_file: if top_file: raise QGenrelaxError("'top_file' and 'cont_file' don't like each " "other. Difficult to continue with a " "different topology...") try: c = QDynInput(open(cont_file, 'r').read(), ignore_errors=ignore_errors) except QDynInputError as err_msg: raise QGenrelaxError("There is something wrong with the given " "input file ({}): {}".format( cont_file, err_msg)) cont_files = c.parameters["files"] di = os.path.dirname(cont_file) top_fn = cont_files["topology"] cont_re_fn = cont_files["final"] re_fn = "cont_{}".format(cont_re_fn) shutil.copy2(os.path.join(di, top_fn), TMPDIR) shutil.copy2(os.path.join(di, cont_re_fn), os.path.join(TMPDIR, re_fn)) if restraint == "cont_inp" and "restraint" in cont_files: cont_rest_fn = cont_files["restraint"] rest_fn = "cont_{}".format(cont_rest_fn) elif restraint == "cont_final": cont_rest_fn = cont_re_fn rest_fn = "cont_{}.rest".format(cont_rest_fn) else: rest_fn = None if rest_fn: shutil.copy2(os.path.join(di, cont_rest_fn), os.path.join(TMPDIR, rest_fn)) if fep_file: logger.warning("Using the fep file '{}', instead of the one " "found in the input".format(fep_file)) fep_fn = os.path.basename(fep_file) shutil.copy2(fep_file, TMPDIR) else: try: fep_fn = cont_files["fep"] shutil.copy2(os.path.join(di, fep_fn), TMPDIR) except KeyError: logger.info("No FEP file found in the input") # or take the arguments else: if not top_file: raise QGenrelaxError("Please specify the topology file or " "a previous input for a continuation run.") cont_files = None top_fn = os.path.basename(top_file) shutil.copy2(top_file, TMPDIR) try: fep_fn = os.path.basename(fep_file) shutil.copy2(fep_file, TMPDIR) except AttributeError: logger.info("NOTE: No FEP file!") if restraint in ["cont_inp", "cont_final"]: raise QGenrelaxError("Can't restrain to '{}'. Specify 'cont_file'." "".format(restraint)) else: rest_fn = None logger.info("Restraining to: '{}'".format(rest_fn or 'topology')) try: shutil.copy2(runscript_file, TMPDIR) except AttributeError: logger.info("No submission script was given.") general_inp = [] steps_inps = [ [], ] script_vars = {} section = "" for line in relax_proc_str.split("\n"): # remove comments and strip whitespaces. line = re.split("#|\!", line)[0].strip() # empty lines are useless if line == "": continue # found a section if line[0] == "{": section = line.strip("{}").lower() continue if not section: raise QGenrelaxError("Failed to parse '{}'... this line - '{}' " "is not inside any section:" "".format(relax_proc_file, line)) if section == "script_vars": c = line.split() var = c[0] value = " ".join(c[1:]) script_vars[var] = value elif section == "general": general_inp.append(line) elif section == "steps": if "__________" in line: steps_inps.append([]) else: steps_inps[-1].append(line) if "fep_fn" in locals(): # find and replace atom placeholders in FEP file # if no PDB was given to replace them, exit fep_tmp = os.path.join(TMPDIR, fep_fn) fep_file_str = open(fep_tmp, 'r').read() c = find_placeholders(fep_file_str) if c and not pdb_file: raise QGenfepsError("Found placeholders in FEP file, but no " "PDB was given: {}".format(", ".join(c))) elif c: logger.info("Replacing FEP file placeholders...") try: qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors) fep_file_str = qstruct.convert_placeholders(fep_file_str) except QStructError as err_msg: raise QGenfepsError("Failed to replace placeholders: {}" "".format(err_msg)) else: open(fep_tmp, 'w').write(fep_file_str) # check for steps with no parameters # (too many _________ lines)and remove them for i in range(len(steps_inps) - 1, -1, -1): if not steps_inps[i]: steps_inps.pop(i) # join lists of lines to strings and replace the placeholders gen_inp_s = "\n".join(general_inp) for placeholder, value in script_vars.iteritems(): gen_inp_s = gen_inp_s.replace(placeholder, value) step_inps_s = [] for i, step_inp in enumerate(steps_inps): s = "\n".join(step_inp) for placeholder, value in script_vars.iteritems(): s = s.replace(placeholder, value) step_inps_s.append(s) # make and save the inputs steps = [] overridden_prms_all = [] step_n = 1 inp_fns = [] # to store the filenames and use the return value for step_inp_s in step_inps_s: # create the files section final = "{}{:03d}.re".format(PREFIX, step_n) dcd = "{}{:03d}.dcd".format(PREFIX, step_n) files = {"final": final, "trajectory": dcd, "topology": top_fn} try: files["fep"] = fep_fn except NameError: pass if step_n != 1: prev_step = step_n - 1 files["restart"] = "{}{:03d}.re".format(PREFIX, prev_step) elif cont_files: files["restart"] = re_fn if rest_fn != None: files["restraint"] = rest_fn try: # parse the general input inp = QDynInput(gen_inp_s, ignore_errors=ignore_errors) # update the general parameters with step input, printout the # overriden parms, update the files section overridden_prms = inp.update(step_inp_s) if overridden_prms: overridden_prms_all.append((step_n, ", ".join( ["{}:{}->{}".format(key, value_old, value_new) \ for key, (value_old, value_new) in \ overridden_prms.iteritems()]))) if "energy" in inp.parameters["intervals"]: files["energy"] = "{}{:03d}.en".format(PREFIX, step_n) inp.update(parameters={"files": files}) except QDynInputError as err_msg: raise QGenrelaxError("Problem with step no. {}: {}" "".format(step_n, err_msg)) # set the random seed mdp = inp.parameters["md"] if "random_seed" in mdp and int(mdp["random_seed"]) < 1: rs = random.randint(1, 1000000) inp.update(parameters={"md": {"random_seed": rs}}) logger.info("Generated random seed in step {}: {}" "".format(step_n, rs)) # get the input string try: inpstr = inp.get_string() except QDynInputError as err_msg: raise QGenrelaxError("Error in step {}: {}" "".format(step_n, err_msg)) inpfn = "{}{:03d}.inp".format(PREFIX, step_n) inp_fns.append(os.path.join(DIR, inpfn)) s = header_comment + inpstr open(os.path.join(TMPDIR, inpfn), 'w').write(s) steps.append(inp) step_n += 1 try: shutil.copytree(TMPDIR, DIR) except OSError: raise QGenrelaxError("Cannot create directory '{}'.".format(DIR)) # remove temporary directory shutil.rmtree(TMPDIR) logger.info("Created inputs {}{:03d}.inp - {}{:03d}.inp" "".format(PREFIX, 1, PREFIX, len(steps))) # print some useful information if overridden_prms_all: logger.info("Overridden parameters:") for step_n, op in overridden_prms_all: logger.info("{}: {}".format(step_n, op)) summary = """ Quick summary {0:<10} {1:>5} {2:>10} {3:>10} {4:^10} {5:^10} {6:^10} {7:^30} {8:^10} {9:>10} """.format("Step", "T", "Stepsize", "Steps", "Seq.rest", "Dist.rest", "Ang.rest", "Shake", "Rand.Seed", "Data (MB)") locale.setlocale(locale.LC_ALL, '') restraints = [] total_time = 0 # print out how much data this run will produce # for this we need the atom count from the topology for line in open(os.path.join(DIR, top_fn), 'r').readlines(1024): if "no. of atoms, no. of solute atoms" in line: num_atoms_all = int(line.strip().split()[0]) break REST_B_PER_ATOM = 48.0 TRJ_B_PER_ATOM = 12.0 # very rough estimate, depends on Q version # it can double if group_contributions are calculated EN_B_PER_STEP = 370.0 CONV_MB = 2**20 # very rough estimate OUT_B_PER_STEP = 2000 TEMP_B_PER_STEP = 160 NB_B_PER_STEP = 80 # intervals mapping: q_parameter_key, q_default_value, approx_bytes_per_frame qintervals = { "trj": ["trajectory", 100, num_atoms_all * TRJ_B_PER_ATOM], "log": ["output", 10, OUT_B_PER_STEP], "temp": ["temperature", 10, TEMP_B_PER_STEP], "en": ["energy", 10, EN_B_PER_STEP], "nb": ["non_bond", 10, NB_B_PER_STEP] } total_data = {"trj": 0, "log": 0, "en": 0, "rest": 0} for i, step in enumerate(steps): nstep = i + 1 try: # get md parameters mdparms = step.parameters["md"] total_time += float(mdparms["stepsize"]) * int(mdparms["steps"]) random_seed = mdparms.get("random_seed", "") # get restraints step_rests = { "sequence_restraints": [], "distance_restraints": [], "angle_restraints": [] } for rest_type in step_rests.keys(): for seqrest in step.parameters.get(rest_type, []): if seqrest in restraints: rest_num = restraints.index(seqrest) + 1 step_rests[rest_type].append(str(rest_num)) else: restraints.append(seqrest) step_rests[rest_type].append(str(len(restraints))) seq = ",".join(step_rests["sequence_restraints"]) dist = ",".join(step_rests["distance_restraints"]) angle = ",".join(step_rests["angle_restraints"]) # get shake parameters shake = [] # this is a Q default, hopefully it will not change if mdparms.get("shake_solvent", "on") == "on": shake.append("solvent") if mdparms.get("shake_hydrogens", "off") == "on": shake.append("hydrogens") if mdparms.get("shake_solute", "off") == "on": shake.append("solute") shake = ",".join(shake) # calculate approx amount of data data = {} mdsteps = int(mdparms["steps"]) for k, v in qintervals.iteritems(): interval_key = v[0] default_interval = v[1] bytes_per_step = v[2] try: interval = int(step.parameters["intervals"][interval_key]) data[k] = mdsteps / interval * bytes_per_step except KeyError: # default data[k] = mdsteps / default_interval * bytes_per_step except ZeroDivisionError: data[k] = 0 # no printout finally: # if energy or trajectory, check that files for output are # defined, otherwise set the printout to 0 if interval_key in ("energy", "trajectory") and not \ interval_key in step.parameters["files"].keys(): data[k] = 0 trj_data = data["trj"] en_data = data["en"] log_data = (data["log"] + data["temp"] + data["nb"]) rest_data = num_atoms_all * REST_B_PER_ATOM total_data["trj"] += trj_data total_data["log"] += log_data total_data["en"] += en_data total_data["rest"] += rest_data data = (trj_data + log_data + rest_data + en_data) / CONV_MB summary += "{:<10} {:>5} {:>10} {:>10} {:^10} {:^10} {:^10} "\ "{:^30} {:^10} {:>10.2f}\n" \ "".format(nstep, mdparms["temperature"], mdparms["stepsize"], locale.format('%d', mdsteps, 1), seq, dist, angle, shake, random_seed, data) except KeyError as err_msg: raise QGenrelaxError("You are missing either 'steps', " "'temperature' or 'stepsize' in one of your " "relaxation steps. These parameters are " "quite important you know...") summary += "Restraints:\n" for i, rest in enumerate(restraints): summary += "{}: {}\n".format(i + 1, rest) summary += """ Total time: {} ps Total wasted storage (wild approximation): \ {:.2f} MB (trj: {:.1f}, log: {:.1f}, en: {:.1f}, rest: {:.1f}) """.format(total_time / 1000.0, sum(total_data.values()) / CONV_MB, total_data["trj"] / CONV_MB, total_data["log"] / CONV_MB, total_data["en"] / CONV_MB, total_data["rest"] / CONV_MB) for l in summary.split("\n"): logger.info(l) return inp_fns