Beispiel #1
0
    def test_convert_placeholders(self):
        inp_string = """
$$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$
$2.CB$ $3.CA$ $LAST.ID$
"""
        out_string = """
$$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$
11 19 456
""".split()
        qstruct = QStruct("data/all_amino_acids.mol2", "mol2")
        assert qstruct.convert_placeholders(inp_string).split() == out_string

        # FAIL
        with pytest.raises(QStructError):
            qstruct.convert_placeholders("$1.CB$")
Beispiel #2
0
    def test_convert_placeholders(self):
        inp_string = """
$$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$
$2.CB$ $3.CA$ $LAST.ID$
"""
        out_string = """
$$ $AAA$ $.AA$ $AA.$ $A.A $ $ A.A$
11     21     25
""".strip()
        qstruct = QStruct("data/ace_ash_nma.pdb", "pdb")
        assert qstruct.convert_placeholders(inp_string).strip() == out_string
Beispiel #3
0
                    "--help",
                    action="help",
                    help="show this "
                    "help message and exit")

if len(sys.argv) == 1:
    parser.print_help()
    sys.exit(1)

args = parser.parse_args()

for k, v in six.iteritems(vars(args)):
    if k in ["inp", "pdb"] and not os.path.lexists(v):
        print("FATAL! File '{}' doesn't exist.".format(v))
        sys.exit(1)

inpstr = open(args.inp, "r").read()

try:
    qstruct = QStruct(args.pdb, "pdb", ignore_errors=args.ignore_errors)
    outstring = qstruct.convert_placeholders(inpstr)
except QStructError as e:
    print("FATAL! Exception raised: {}".format(str(e)))
    sys.exit(1)

backup = backup_file(args.out)
if backup:
    print("Backed up '{}' to '{}'".format(args.out, backup))
open(args.out, "w").write(outstring)
print("Created file '{}'...".format(args.out))
Beispiel #4
0
def genrelax(relax_proc_file,
             outdir,
             restraint,
             top_file=None,
             fep_file=None,
             runscript_file=None,
             pdb_file=None,
             cont_file=None,
             ignore_errors=False):
    """Generates inputs for an MD simulation with Q (qdyn5).

    Arguments:
        relax_proc_file (string):  genrelax procedure file pathname
        outdir (string):  output directory
        restraint (string):  restraint coordinate (a)

    Optional arguments (b)
        top_file (string):  Q topology pathname
        fep_file (string):  fep file pathname
        runscript_file (string):  slurm/sge run script
        pdb_file (string):  pdb pathname (used to convert placeholders)
        cont_file (string):  pathname of previous qdyn5 input (continuation)
        ignore_errors (boolean):  passed to QStruct and QDynInp - write to
                                  logger instead of raising exceptions on
                                  non-critical things


    (a) Restraint coordinate can be set to:
       'top' - topology
       'cont_inp' - whatever is defined in cont_file
       'cont_final' - endpoint of previous simulation
                      (final restart of cont_file)

    (b) top_file and cont_file are mutually exclusive, one of them has to
        be provided

    """

    # check if files exist
    for k, v in locals().iteritems():
        if k in [
                "pdb_file", "cont_file", "relax_proc_file", "fep_file",
                "top_file", "runscript_file", "relax_input"
        ]:
            if v and not os.path.lexists(v):
                raise QGenrelaxError("File '{}' doesn't exist.".format(v))

    if restraint not in ["top", "cont_inp", "cont_final"]:
        raise QGenrelaxError("Argument 'restraint' has to be either "
                             "'cont_inp', 'top' or 'cont_final'")

    # constants
    PREFIX = "relax_"
    DIR = os.path.join(os.getcwd(), outdir)
    if os.path.lexists(DIR):
        raise QGenrelaxError("Directory '{}' exists. Please (re)move it "
                             "or set 'outdir'.".format(DIR))

    TMPDIR = tempfile.mkdtemp()

    header_comment = """\
# Generated with QTools, version {}
# Date: {}
# CWD: {}
# Cmdline: {}
""".format(__version__, time.ctime(), os.getcwd(), " ".join(sys.argv))

    # find and replace placeholders. if not PDB was given to replace them, exit
    relax_proc_str = open(relax_proc_file, 'r').read()
    c = find_placeholders(relax_proc_str)
    if c and not pdb_file:
        raise QGenrelaxError("Found placeholders in proc.file, but no PDB "
                             "was given: {}".format(", ".join(c)))
    elif c:
        logger.info("These placeholders will be replaced with atom indices: {}"
                    "".format(", ".join(c)))
        try:
            qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors)
            relax_proc_str = qstruct.convert_placeholders(relax_proc_str)
        except QStructError as err_msg:
            raise QGenrelaxError("Failed to replace placeholders: "
                                 "{}".format(err_msg))

    # get topology and fep and others from previous input if given (--cont)
    if cont_file:
        if top_file:
            raise QGenrelaxError("'top_file' and 'cont_file' don't like each "
                                 "other. Difficult to continue with a "
                                 "different topology...")
        try:
            c = QDynInput(open(cont_file, 'r').read(),
                          ignore_errors=ignore_errors)
        except QDynInputError as err_msg:
            raise QGenrelaxError("There is something wrong with the given "
                                 "input file ({}): {}".format(
                                     cont_file, err_msg))

        cont_files = c.parameters["files"]
        di = os.path.dirname(cont_file)
        top_fn = cont_files["topology"]
        cont_re_fn = cont_files["final"]
        re_fn = "cont_{}".format(cont_re_fn)
        shutil.copy2(os.path.join(di, top_fn), TMPDIR)
        shutil.copy2(os.path.join(di, cont_re_fn), os.path.join(TMPDIR, re_fn))

        if restraint == "cont_inp" and "restraint" in cont_files:
            cont_rest_fn = cont_files["restraint"]
            rest_fn = "cont_{}".format(cont_rest_fn)
        elif restraint == "cont_final":
            cont_rest_fn = cont_re_fn
            rest_fn = "cont_{}.rest".format(cont_rest_fn)
        else:
            rest_fn = None

        if rest_fn:
            shutil.copy2(os.path.join(di, cont_rest_fn),
                         os.path.join(TMPDIR, rest_fn))

        if fep_file:
            logger.warning("Using the fep file '{}', instead of the one "
                           "found in the input".format(fep_file))

            fep_fn = os.path.basename(fep_file)
            shutil.copy2(fep_file, TMPDIR)
        else:
            try:
                fep_fn = cont_files["fep"]
                shutil.copy2(os.path.join(di, fep_fn), TMPDIR)
            except KeyError:
                logger.info("No FEP file found in the input")

    # or take the arguments
    else:
        if not top_file:
            raise QGenrelaxError("Please specify the topology file or "
                                 "a previous input for a continuation run.")

        cont_files = None
        top_fn = os.path.basename(top_file)
        shutil.copy2(top_file, TMPDIR)
        try:
            fep_fn = os.path.basename(fep_file)
            shutil.copy2(fep_file, TMPDIR)
        except AttributeError:
            logger.info("NOTE: No FEP file!")

        if restraint in ["cont_inp", "cont_final"]:
            raise QGenrelaxError("Can't restrain to '{}'. Specify 'cont_file'."
                                 "".format(restraint))
        else:
            rest_fn = None

    logger.info("Restraining to: '{}'".format(rest_fn or 'topology'))

    try:
        shutil.copy2(runscript_file, TMPDIR)
    except AttributeError:
        logger.info("No submission script was given.")

    general_inp = []
    steps_inps = [
        [],
    ]
    script_vars = {}

    section = ""
    for line in relax_proc_str.split("\n"):
        # remove comments and strip whitespaces.
        line = re.split("#|\!", line)[0].strip()
        # empty lines are useless
        if line == "":
            continue
        # found a section
        if line[0] == "{":
            section = line.strip("{}").lower()
            continue

        if not section:
            raise QGenrelaxError("Failed to parse '{}'... this line - '{}' "
                                 "is not inside any section:"
                                 "".format(relax_proc_file, line))

        if section == "script_vars":
            c = line.split()
            var = c[0]
            value = " ".join(c[1:])
            script_vars[var] = value
        elif section == "general":
            general_inp.append(line)
        elif section == "steps":
            if "__________" in line:
                steps_inps.append([])
            else:
                steps_inps[-1].append(line)

    if "fep_fn" in locals():
        # find and replace atom placeholders in FEP file
        # if no PDB was given to replace them, exit
        fep_tmp = os.path.join(TMPDIR, fep_fn)
        fep_file_str = open(fep_tmp, 'r').read()
        c = find_placeholders(fep_file_str)
        if c and not pdb_file:
            raise QGenfepsError("Found placeholders in FEP file, but no "
                                "PDB was given: {}".format(", ".join(c)))
        elif c:
            logger.info("Replacing FEP file placeholders...")
            try:
                qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors)
                fep_file_str = qstruct.convert_placeholders(fep_file_str)
            except QStructError as err_msg:
                raise QGenfepsError("Failed to replace placeholders: {}"
                                    "".format(err_msg))
            else:
                open(fep_tmp, 'w').write(fep_file_str)

    # check for steps with no parameters
    # (too many _________ lines)and remove them
    for i in range(len(steps_inps) - 1, -1, -1):
        if not steps_inps[i]:
            steps_inps.pop(i)

    # join lists of lines to strings and replace the placeholders
    gen_inp_s = "\n".join(general_inp)
    for placeholder, value in script_vars.iteritems():
        gen_inp_s = gen_inp_s.replace(placeholder, value)

    step_inps_s = []
    for i, step_inp in enumerate(steps_inps):
        s = "\n".join(step_inp)
        for placeholder, value in script_vars.iteritems():
            s = s.replace(placeholder, value)
        step_inps_s.append(s)

    # make and save the inputs
    steps = []
    overridden_prms_all = []
    step_n = 1
    inp_fns = []  # to store the filenames and use the return value
    for step_inp_s in step_inps_s:
        # create the files section
        final = "{}{:03d}.re".format(PREFIX, step_n)
        dcd = "{}{:03d}.dcd".format(PREFIX, step_n)
        files = {"final": final, "trajectory": dcd, "topology": top_fn}
        try:
            files["fep"] = fep_fn
        except NameError:
            pass
        if step_n != 1:
            prev_step = step_n - 1
            files["restart"] = "{}{:03d}.re".format(PREFIX, prev_step)
        elif cont_files:
            files["restart"] = re_fn

        if rest_fn != None:
            files["restraint"] = rest_fn

        try:
            # parse the general input
            inp = QDynInput(gen_inp_s, ignore_errors=ignore_errors)
            # update the general parameters with step input, printout the
            # overriden parms, update the files section
            overridden_prms = inp.update(step_inp_s)
            if overridden_prms:
                overridden_prms_all.append((step_n, ", ".join(
                    ["{}:{}->{}".format(key, value_old, value_new) \
                            for key, (value_old, value_new) in \
                            overridden_prms.iteritems()])))

            if "energy" in inp.parameters["intervals"]:
                files["energy"] = "{}{:03d}.en".format(PREFIX, step_n)

            inp.update(parameters={"files": files})

        except QDynInputError as err_msg:
            raise QGenrelaxError("Problem with step no. {}: {}"
                                 "".format(step_n, err_msg))

        # set the random seed
        mdp = inp.parameters["md"]
        if "random_seed" in mdp and int(mdp["random_seed"]) < 1:
            rs = random.randint(1, 1000000)
            inp.update(parameters={"md": {"random_seed": rs}})
            logger.info("Generated random seed in step {}: {}"
                        "".format(step_n, rs))

        # get the input string
        try:
            inpstr = inp.get_string()
        except QDynInputError as err_msg:
            raise QGenrelaxError("Error in step {}: {}"
                                 "".format(step_n, err_msg))

        inpfn = "{}{:03d}.inp".format(PREFIX, step_n)
        inp_fns.append(os.path.join(DIR, inpfn))
        s = header_comment + inpstr
        open(os.path.join(TMPDIR, inpfn), 'w').write(s)

        steps.append(inp)
        step_n += 1

    try:
        shutil.copytree(TMPDIR, DIR)
    except OSError:
        raise QGenrelaxError("Cannot create directory '{}'.".format(DIR))

    # remove temporary directory
    shutil.rmtree(TMPDIR)
    logger.info("Created inputs {}{:03d}.inp - {}{:03d}.inp"
                "".format(PREFIX, 1, PREFIX, len(steps)))

    # print some useful information
    if overridden_prms_all:
        logger.info("Overridden parameters:")
        for step_n, op in overridden_prms_all:
            logger.info("{}: {}".format(step_n, op))

    summary = """
Quick summary
{0:<10} {1:>5} {2:>10} {3:>10} {4:^10} {5:^10} {6:^10} {7:^30} {8:^10} {9:>10}
""".format("Step", "T", "Stepsize", "Steps", "Seq.rest", "Dist.rest",
           "Ang.rest", "Shake", "Rand.Seed", "Data (MB)")
    locale.setlocale(locale.LC_ALL, '')
    restraints = []
    total_time = 0

    # print out how much data this run will produce
    # for this we need the atom count from the topology
    for line in open(os.path.join(DIR, top_fn), 'r').readlines(1024):
        if "no. of atoms, no. of solute atoms" in line:
            num_atoms_all = int(line.strip().split()[0])
            break

    REST_B_PER_ATOM = 48.0
    TRJ_B_PER_ATOM = 12.0
    # very rough estimate, depends on Q version
    # it can double if group_contributions are calculated
    EN_B_PER_STEP = 370.0
    CONV_MB = 2**20
    # very rough estimate
    OUT_B_PER_STEP = 2000
    TEMP_B_PER_STEP = 160
    NB_B_PER_STEP = 80

    # intervals mapping: q_parameter_key, q_default_value, approx_bytes_per_frame
    qintervals = {
        "trj": ["trajectory", 100, num_atoms_all * TRJ_B_PER_ATOM],
        "log": ["output", 10, OUT_B_PER_STEP],
        "temp": ["temperature", 10, TEMP_B_PER_STEP],
        "en": ["energy", 10, EN_B_PER_STEP],
        "nb": ["non_bond", 10, NB_B_PER_STEP]
    }

    total_data = {"trj": 0, "log": 0, "en": 0, "rest": 0}

    for i, step in enumerate(steps):
        nstep = i + 1
        try:

            # get md parameters
            mdparms = step.parameters["md"]
            total_time += float(mdparms["stepsize"]) * int(mdparms["steps"])
            random_seed = mdparms.get("random_seed", "")

            # get restraints
            step_rests = {
                "sequence_restraints": [],
                "distance_restraints": [],
                "angle_restraints": []
            }
            for rest_type in step_rests.keys():
                for seqrest in step.parameters.get(rest_type, []):
                    if seqrest in restraints:
                        rest_num = restraints.index(seqrest) + 1
                        step_rests[rest_type].append(str(rest_num))
                    else:
                        restraints.append(seqrest)
                        step_rests[rest_type].append(str(len(restraints)))
            seq = ",".join(step_rests["sequence_restraints"])
            dist = ",".join(step_rests["distance_restraints"])
            angle = ",".join(step_rests["angle_restraints"])

            # get shake parameters
            shake = []
            # this is a Q default, hopefully it will not change
            if mdparms.get("shake_solvent", "on") == "on":
                shake.append("solvent")
            if mdparms.get("shake_hydrogens", "off") == "on":
                shake.append("hydrogens")
            if mdparms.get("shake_solute", "off") == "on":
                shake.append("solute")
            shake = ",".join(shake)

            # calculate approx amount of data
            data = {}
            mdsteps = int(mdparms["steps"])
            for k, v in qintervals.iteritems():
                interval_key = v[0]
                default_interval = v[1]
                bytes_per_step = v[2]
                try:
                    interval = int(step.parameters["intervals"][interval_key])
                    data[k] = mdsteps / interval * bytes_per_step
                except KeyError:
                    # default
                    data[k] = mdsteps / default_interval * bytes_per_step
                except ZeroDivisionError:
                    data[k] = 0  # no printout
                finally:
                    # if energy or trajectory, check that files for output are
                    # defined, otherwise set the printout to 0
                    if interval_key in ("energy", "trajectory") and not \
                            interval_key in step.parameters["files"].keys():
                        data[k] = 0

            trj_data = data["trj"]
            en_data = data["en"]
            log_data = (data["log"] + data["temp"] + data["nb"])
            rest_data = num_atoms_all * REST_B_PER_ATOM
            total_data["trj"] += trj_data
            total_data["log"] += log_data
            total_data["en"] += en_data
            total_data["rest"] += rest_data

            data = (trj_data + log_data + rest_data + en_data) / CONV_MB

            summary += "{:<10} {:>5} {:>10} {:>10} {:^10} {:^10} {:^10} "\
                       "{:^30} {:^10} {:>10.2f}\n" \
                       "".format(nstep, mdparms["temperature"],
                                 mdparms["stepsize"],
                                 locale.format('%d', mdsteps, 1),
                                 seq, dist, angle, shake, random_seed, data)

        except KeyError as err_msg:
            raise QGenrelaxError("You are missing either 'steps', "
                                 "'temperature' or 'stepsize' in one of your "
                                 "relaxation steps. These parameters are "
                                 "quite important you know...")

    summary += "Restraints:\n"
    for i, rest in enumerate(restraints):
        summary += "{}: {}\n".format(i + 1, rest)

    summary += """
Total time: {} ps
Total wasted storage (wild approximation): \
{:.2f} MB (trj: {:.1f}, log: {:.1f}, en: {:.1f}, rest: {:.1f})
""".format(total_time / 1000.0,
           sum(total_data.values()) / CONV_MB, total_data["trj"] / CONV_MB,
           total_data["log"] / CONV_MB, total_data["en"] / CONV_MB,
           total_data["rest"] / CONV_MB)

    for l in summary.split("\n"):
        logger.info(l)

    return inp_fns
Beispiel #5
0
def genfeps(fep_proc_file,
            relax_input_file,
            restraint,
            energy_list_fn,
            frames,
            repeats,
            fromlambda,
            prefix,
            first_frame_eq,
            pdb_file=None,
            fep_file=None,
            runscript_file=None,
            ignore_errors=False):
    """Generates inputs for a FEP/MD simulation with Q (qdyn5).

    Arguments:
        fep_proc_file (string):  genfeps procedure file pathname
        relax_input_file (string):  pathname of last relaxation step input
        restraint (string):  restraint coordinate (a)
        energy_list_fn (string):  name of file that will contain the en.f.list
        frames (int):  number of FEP frames
        repeats (int):  number of repeats/replicas
        fromlambda (float):  starting lambda (0.0 - 1.0)
        prefix (string):  prefix for repeat directories
        first_frame_eq (boolean):  use equil instead of first frame (cadee)

    Optional arguments:
        pdb_file (string):  pdb pathname (used to convert placeholders)
        fep_file (string):  alternate fep file pathname (ignoring input's fep)
        runscript_file (string):  slurm/sge run script
        ignore_errors (boolean):  passed to QStruct and QDynInp - write to
                                  logger instead of raising exceptions on
                                  non-critical things

    Returns:
        rep_dirs (list):  list of created replica folders


    (a) Restraint coordinate can be set to:
       'inp' - whatever is defined in relax_input_file
       'top' - topology
       'relax' - endpoint of relaxation

    """

    frames = int(frames)
    repeats = int(repeats)
    if fromlambda != None:
        fromlambda = float(fromlambda)

    # constants
    PREFIX_EQ = "equil_"
    PREFIX_FEP = "fep_"

    # check if files exist
    for k, v in locals().iteritems():
        if k in [
                "pdb_file", "fep_proc_file", "fep_file", "runscript_file",
                "relax_input_file"
        ]:
            if v and not os.path.lexists(v):
                raise QGenfepsError("File '{}' doesn't exist.".format(v))

    if restraint not in ["top", "relax", "inp"]:
        raise QGenfepsError("Argument 'restraint' has to be either "
                            "'inp', 'top' or 'relax'")

    # find and replace atom placeholders.
    # if no PDB was given to replace them, exit
    fep_proc_str = open(fep_proc_file, 'r').read()
    c = find_placeholders(fep_proc_str)
    if c and not pdb_file:
        raise QGenfepsError("Found placeholders in proc. file, but no PDB "
                            "was given: {}".format(", ".join(c)))
    elif c:
        logger.info("These placeholders will be replaced with atom indices: " +
                    ", ".join(c))
        try:
            qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors)
            fep_proc_str = qstruct.convert_placeholders(fep_proc_str)
        except QStructError as err_msg:
            raise QGenfepsError("Failed to replace placeholders: {}"
                                "".format(err_msg))

    # make a nice header comment in each input file with the
    header_comment = """\
# Generated with QTools, version {}
# Date: {}
# CWD: {}
# Cmdline: {}
""".format(__version__, time.ctime(), os.getcwd(), " ".join(sys.argv))

    # get topology and fep and others from the last relaxation input
    top_file_abs, fep_file_abs, re_file_abs, rest_file = None, None, None, None
    lambda_initial = None
    try:
        c = QDynInput(open(relax_input_file, 'r').read(),
                      ignore_errors=ignore_errors)
    except QDynInputError as err_msg:
        raise QGenfepsError("There is something wrong with the given input "
                            "file ({}): {}".format(relax_input_file, err_msg))

    di = os.path.dirname(relax_input_file)
    try:
        files = c.parameters["files"]
        lambda_initial = float(c.parameters["lambdas"].split()[0])
        top_file_abs = os.path.join(di, files["topology"])
        re_file_abs = os.path.join(di, files["final"])
        fep_file_abs = os.path.join(di, files["fep"])
        if "restraint" in files:
            rest_file = os.path.join(di, files["restraint"])
    except KeyError as err_msg:
        raise QGenfepsError("Parsing the relaxation input file failed, "
                            "keyword missing... {}".format(err_msg))

    # check if the files actually exist
    for fn, descr in [(top_file_abs, "topology"), (fep_file_abs, "fep"),
                      (re_file_abs, "final"), (rest_file, "restraint")]:
        if fn and not os.path.lexists(fn):
            raise QGenfepsError("When parsing the input, found this filename "
                                "'{}' next to the '{}' command. Unfortunately,"
                                " the file doesnt exist...".format(fn, descr))

    # change the FEP (when debugging your system, you might want to
    # use an old relax and not waste 100M core hours when changing
    # a soft core value in the fep)
    if fep_file:
        fep_file_abs = os.path.abspath(fep_file)

    # find and replace atom placeholders in FEP file
    # if no PDB was given to replace them, exit
    fep_file_str = open(fep_file_abs, 'r').read()
    c = find_placeholders(fep_file_str)
    if c and not pdb_file:
        raise QGenfepsError("Found placeholders in FEP file, but no PDB was "
                            "given: {}".format(", ".join(c)))
    elif c:
        logger.info("Replacing FEP file placeholders...")
        try:
            qstruct = QStruct(pdb_file, "pdb", ignore_errors=ignore_errors)
            fep_file_str = qstruct.convert_placeholders(fep_file_str)
        except QStructError as err_msg:
            raise QGenfepsError("Failed to replace placeholders: {}"
                                "".format(err_msg))

    # change the inital lambda (this is not recommended, the system should
    # be properly relaxed at a particual lambda before doing FEP)
    if fromlambda != None:
        lambda_initial = float(fromlambda)
        if lambda_initial > 1.0 or lambda_initial < 0.0:
            raise QGenfepsError("Lambda value is bogus, are you on drugs?")

    # create lambda values, find the closest to the starting one and
    # rearrange accordingly: [0.0, 0.02, 0.04, ... 0.98, 1.0]  for frames==51
    lambdas = [float(num) / (frames - 1) for num in xrange(0, frames)]

    # [2,]   for lambda_initial == 0.04 (or close to 0.04) and frames==51
    l_i = [i for i in xrange(0, frames) if \
            abs(lambdas[i] - lambda_initial) <= (1.0 / frames)]
    # there should be only one
    l_i = l_i[0]
    lambda_initial = lambdas[l_i]

    # [0.02, 0.0,] for the case of lambda_initial == 0.04 and frames == 51
    forward_lambdas = list(reversed(lambdas[0:l_i]))
    # [0.06, 0.08, ..., 1.0] for the case of lambda_initial == 0.04, fr. == 51
    backward_lambdas = lambdas[l_i + 1:]

    lambdas = [
        lambda_initial,
    ] + forward_lambdas + backward_lambdas

    # print out some useful information
    logger.info("Using restart file: {}"
                "".format(os.path.relpath(re_file_abs)))
    logger.info("Using topology file: {}"
                "".format(os.path.relpath(top_file_abs)))
    logger.info("Using FEP file: {}" "".format(os.path.relpath(fep_file_abs)))
    logger.info("Starting from lambda value (state 1): {}"
                "".format(lambda_initial))
    logger.info("Number of FEP frames: {} ".format(frames))

    # create a temporary directory to store the files that are identical
    # in all replicas - top, fep, runscript, relax restart, restraint file
    # (if any) and copy the common files
    TMPDIR = tempfile.mkdtemp()
    top_fn = os.path.basename(top_file_abs)
    fep_fn = os.path.basename(fep_file_abs)
    relax_re_fn = "cont_" + os.path.basename(re_file_abs)
    shutil.copy2(top_file_abs, TMPDIR)
    shutil.copy2(re_file_abs, os.path.join(TMPDIR, relax_re_fn))

    open(os.path.join(TMPDIR, fep_fn), "w").write(fep_file_str)
    if runscript_file:
        shutil.copy2(runscript_file, TMPDIR)
    else:
        logger.info("No Q runscript given.")

    # handle the whole restraint coordinates crap...
    # rest_file is the file from the relaxation input (if any)
    # rest_fn is the basename of the restraints file (either from input
    # or relaxed.re.rest), or None if rest. to topology
    if restraint == "relax":
        logger.info("Restraining to: relaxation")
        rest_fn = "cont_" + os.path.basename(re_file_abs) + ".rest"
        shutil.copy2(re_file_abs, os.path.join(TMPDIR, rest_fn))
    elif restraint == "top":
        logger.info("Restraining to: topology")
        rest_fn = None
    else:  # default, from input
        if rest_file:
            logger.info("Restraining to: {} (from input)"
                        "".format(os.path.relpath(rest_file)))
            rest_fn = "cont_" + os.path.basename(rest_file)
            shutil.copy2(rest_file, os.path.join(TMPDIR, rest_fn))
        else:
            logger.info("Restraining to: topology (from input)")
            rest_fn = None

    # parse the proc file
    general_inp = []
    eq_steps_inps = [
        [],
    ]
    fep_inp = []
    script_vars = {}

    section = ""
    for line in fep_proc_str.split("\n"):
        # remove comments and strip whitespaces.
        line = re.split("#|\!", line)[0].strip()
        # empty lines are useless
        if line == "":
            continue
        # found a section
        if line[0] == "{":
            section = line.strip("{}").lower()
            continue

        if not section:
            raise QGenfepsError("Parsing the procedure file failed... This "
                                "line: '{}' is not inside any section:"
                                "".format(line))

        if section == "script_vars":
            c = line.split()
            var, value = c[0], " ".join(c[1:])
            script_vars[var] = value
        elif section == "general":
            general_inp.append(line)
        elif section == "steps_equil":
            if "__________" in line:
                eq_steps_inps.append([])
            else:
                eq_steps_inps[-1].append(line)
        elif section == "fep":
            fep_inp.append(line)
        else:
            raise QGenfepsError("Parsing the procedure file failed: "
                                "Unsupported section: '{}'".format(section))

    # check for steps with no parameters (too many _________ lines)
    # and remove them
    for i in range(len(eq_steps_inps) - 1, -1, -1):
        if not eq_steps_inps[i]:
            eq_steps_inps.pop(i)

    # check for missing sections
    for l, n in ((general_inp, "GENERAL"), (eq_steps_inps, "STEPS_EQUIL"),
                 (fep_inp, "FEP")):
        if not l:
            raise QGenfepsError("Parsing the procedure file failed: "
                                "Section '{}' is missing".format(n))

    # join lists of lines to strings and replace the placeholders
    script_variables = sorted(script_vars.items(), reverse=True)
    gen_inp_s = "\n".join(general_inp)
    fep_inp_s = "\n".join(fep_inp)
    eq_steps_inps_s = ["\n".join(eq_s_inp) for eq_s_inp in eq_steps_inps]

    for placeholder, value in script_variables:
        gen_inp_s = gen_inp_s.replace(placeholder, value)
        fep_inp_s = fep_inp_s.replace(placeholder, value)
        for step_i, eq_step_inp_s in enumerate(eq_steps_inps_s):
            eq_steps_inps_s[step_i] = eq_step_inp_s.replace(placeholder, value)

    ####################
    # make equil. inputs
    eq_steps = []
    for step_n, eq_step_inp_s in enumerate(eq_steps_inps_s):
        # create the files section
        final = "{}{:03d}_{:4.3f}.re".format(PREFIX_EQ, step_n, lambda_initial)
        dcd = "{}{:03d}_{:4.3f}.dcd".format(PREFIX_EQ, step_n, lambda_initial)
        files = {
            "final": final,
            "trajectory": dcd,
            "topology": top_fn,
            "fep": fep_fn
        }

        if first_frame_eq:
            files["energy"] = "{}{:03d}_{:4.3f}.en".format(
                PREFIX_EQ, step_n, lambda_initial)

        if rest_fn:
            files["restraint"] = rest_fn

        if step_n != 0:
            files["restart"] = "{}{:03d}_{:4.3f}.re".format(
                PREFIX_EQ, step_n - 1, lambda_initial)
        else:
            files["restart"] = relax_re_fn

        # parse the general input and update with step input and files section
        try:
            inp = QDynInput(gen_inp_s, ignore_errors=ignore_errors)
            inp.update(eq_step_inp_s)
            if "energy" in inp.parameters["intervals"]:
                files["energy"] = "{}{:03d}_{:4.3f}.en".format(
                    PREFIX_EQ, step_n, lambda_initial)
            elif first_frame_eq:
                raise QGenfepsError("Argument 'first_frame_eq' requires the "
                                    "energy printout defined in the intervals "
                                    "section of the equilibration "
                                    "(e.g. 'energy   10')")

            inp.update(parameters={"files": files})
            inp.update(
                parameters={
                    "lambdas":
                    "{:9.7f} {:9.7f}"
                    "".format(lambda_initial, 1 - lambda_initial)
                })
        except QDynInputError as err_msg:
            raise QGenfepsError("Problem with equil. step no. {}: {}"
                                "".format(step_n, err_msg))

        # test the input string
        try:
            _ = inp.get_string()
        except QDynInputError as err_msg:
            raise QGenfepsError("Error in equil. step {}: {}"
                                "".format(step_n, err_msg))

        # check if random seed is not defined or is fixed in the first step
        if step_n == 0:
            if repeats > 1:
                if ("random_seed" not in inp.parameters["md"]) or \
                        (int(inp.parameters["md"]["random_seed"]) > 0):
                    raise QGenfepsError("Fixed random seed (or restart "
                                        "velocities) works only with one "
                                        "repeat (others will be identical).\n"
                                        "Please use 'random_seed   -1' in "
                                        "your first equilibration step to "
                                        "generate random random seeds.")

            elif "random_seed" not in inp.parameters["md"]:
                logger.info("No random seed in first step of equilibration,"
                            "using restart velocities.")

                if (not rest_file and rest_fn) or (not rest_fn and rest_file) \
                  or (rest_file and (os.path.basename(rest_file) != rest_fn)):
                    logger.warning("This will not be a true continuation run! "
                                   "The relaxation restraint does not match "
                                   "yours. Use 'inp' instead of 'top' or "
                                   "'relax' for the restraint.")

        # append the input
        eq_steps.append(inp)

    #################
    # make FEP inputs
    en_filenames = []
    feps = []

    for step_n, lam in enumerate(lambdas):
        # create the files section
        final = "{}{:03d}_{:4.3f}.re".format(PREFIX_FEP, step_n, lam)
        dcd = "{}{:03d}_{:4.3f}.dcd".format(PREFIX_FEP, step_n, lam)
        en = "{}{:03d}_{:4.3f}.en".format(PREFIX_FEP, step_n, lam)
        files = {
            "final": final,
            "trajectory": dcd,
            "topology": top_fn,
            "energy": en,
            "fep": fep_fn
        }

        # if this step is in new direction (backwards) then
        # set the previous lambda and step to initial
        if backward_lambdas and lam == backward_lambdas[0]:
            prev_fep = feps[0]
        elif step_n == 0:
            prev_fep = eq_steps[-1]
        else:
            prev_fep = feps[-1]

        # if this flag is set, all steps that point to the first step
        # should point to the last eq step
        if first_frame_eq:
            if step_n == 1 or (backward_lambdas
                               and lam == backward_lambdas[0]):
                prev_fep = eq_steps[-1]

        if rest_fn:
            files["restraint"] = rest_fn

        files["restart"] = prev_fep.parameters["files"]["final"]

        # update the parameters and check the input
        try:
            inp = QDynInput(gen_inp_s, ignore_errors=ignore_errors)
            inp.update(fep_inp_s)
            if "energy" not in inp.parameters["intervals"]:
                raise QGenfepsError("FEP stage requires the energy printout "
                                    "defined in the intervals section "
                                    "(e.g. 'energy   10')")

            inp.update(parameters={"files": files})
            inp.update(parameters={
                "lambdas": "{:9.7f} {:9.7f}"
                "".format(lam, 1 - lam)
            })
            inp.check()
        except QDynInputError as err_msg:
            raise QGenfepsError("Error in FEP step {}: {}"
                                "".format(step_n, err_msg))

        # append the input
        feps.append(inp)

        # add the energy filename to the list
        en_filenames.append(inp.parameters["files"]["energy"])

    # if first_frame_eq is set add the energy file and remove the
    # first fep frame
    if first_frame_eq:
        logger.info("Replacing the first FEP frame with the last "
                    "equilibration step")
        en_filenames[0] = eq_steps[-1].parameters["files"]["energy"]
        feps.pop(0)

    # check random seed in fep
    if "random_seed" in feps[0].parameters["md"] and \
            int(feps[0].parameters["md"]["random_seed"]) < 1:
        logger.warning("Generating random seeds in FEP inputs. "
                       "Are you sure this is ok?")

    # write a file that contains the names of all energy files in proper order
    # this file is used later by q_mapper.py
    # sort the enfiles according to lambda (1.0 -> 0.0) so that the mapping
    # will always go from reactants to products
    enfiles_lambdas = sorted([(enf.split("_")[-1], i) for i, enf in \
            enumerate(en_filenames)], reverse=True)
    en_filenames_sorted = [en_filenames[i] for l, i in enfiles_lambdas]
    enf = os.path.join(TMPDIR, energy_list_fn)
    open(enf, 'w').write("\n".join(en_filenames_sorted))

    # create directories for repeats/replicas (rep_000,rep_001,rep_002...)
    # copy everything from TMPDIR (topology, fep file, relax restart and
    # restraint file (if any)); create the eq and fep inputs
    #
    # first check for existing directories
    for num in xrange(0, repeats):
        rep = "{}{:03d}".format(prefix, num)
        if os.path.lexists(rep):
            raise QGenfepsError("Directory '{}' exists. Please (re)move it or "
                                "change the prefix with --prefix.".format(rep))

    lsdir = os.listdir(TMPDIR)
    rep_dirs = []
    for num in xrange(0, repeats):
        rep = "{}{:03d}".format(prefix, num)
        os.mkdir(rep)
        # copy stuff from TMPDIR
        for f in lsdir:
            shutil.copy2(os.path.join(TMPDIR, f), rep)

        # create eq inputs
        for step_n, eq_step in enumerate(eq_steps):

            # check if random seed is a fixed value or not (generate random or fail)
            eqs = copy.deepcopy(eq_step)  # a copy
            if "random_seed" in eqs.parameters["md"] and \
                    int(eqs.parameters["md"]["random_seed"]) < 1:
                rs = random.randint(1, 1e6)
                eqs.update(parameters={"md": {"random_seed": rs}})

            try:
                s = eqs.get_string()
            except QDynInputError as err_msg:
                raise QGenfepsError("Error in step {}: {}"
                                    "".format(step_n, err_msg))
            fn = os.path.join(
                rep, "{}{:03d}_{:4.3f}.inp"
                "".format(PREFIX_EQ, step_n, lambda_initial))
            s = header_comment + s
            open(fn, 'w').write(s)

        last_eq_fn = fn
        # create FEP inputs
        for step_n, fep in enumerate(feps):
            if first_frame_eq:
                step_n += 1

            fs = copy.deepcopy(fep)  # a copy
            if "random_seed" in fs.parameters["md"] and \
                    int(fs.parameters["md"]["random_seed"]) < 1:
                rs = random.randint(1, 1e6)
                fs.update(parameters={"md": {"random_seed": rs}})

            try:
                s = fs.get_string()
            except QDynInputError as err_msg:
                raise QGenfepsError("Error in step {}: {}"
                                    "".format(step_n, err_msg))
            lam = lambdas[step_n]  # feps was created in lambdas iteration
            fn = os.path.join(
                rep, "{}{:03d}_{:4.3f}.inp"
                "".format(PREFIX_FEP, step_n, lam))
            s = header_comment + s
            open(fn, 'w').write(s)

        logger.info("Created inputs for repeat/replica '{}'.".format(rep))
        rep_dirs.append(rep)

    # get the amount of storage that will be wasted
    # for this we need the atom count from the topology
    for line in open(os.path.join(TMPDIR, top_fn), 'r').readlines(1024):
        if "no. of atoms, no. of solute atoms" in line:
            num_atoms_all = int(line.split()[0])
            break

    REST_B_PER_ATOM = 48.0
    TRJ_B_PER_ATOM = 12.0
    # very rough estimate, depends on Q version
    # it can double if group_contributions are calculated
    EN_B_PER_STEP = 370.0
    CONV_MB = 2**20
    # very rough estimate
    OUT_B_PER_STEP = 2000
    TEMP_B_PER_STEP = 160
    NB_B_PER_STEP = 80

    # intervals maps: q_parameter_key, q_default_value, approx_bytes_per_frame
    qintervals = {
        "trj": ["trajectory", 100, num_atoms_all * TRJ_B_PER_ATOM],
        "log": ["output", 10, OUT_B_PER_STEP],
        "temp": ["temperature", 10, TEMP_B_PER_STEP],
        "en": ["energy", 10, EN_B_PER_STEP],
        "nb": ["non_bond", 10, NB_B_PER_STEP]
    }
    total_data = {"trj": 0, "log": 0, "en": 0, "rest": 0}
    # calculate approx amount of data
    for i, step in enumerate(eq_steps + feps):
        data = {}
        mdsteps = int(step.parameters["md"]["steps"])
        for k, v in qintervals.iteritems():
            interval_key = v[0]
            default_interval = v[1]
            bytes_per_step = v[2]
            try:
                interval = int(step.parameters["intervals"][interval_key])
                data[k] = mdsteps / interval * bytes_per_step
            except KeyError:
                # default
                data[k] = mdsteps / default_interval * bytes_per_step
            except ZeroDivisionError:
                data[k] = 0  # no printout
            finally:
                # if energy or trajectory, check that files for output are
                # defined, otherwise set the printout to 0
                if interval_key in ("energy", "trajectory") and not \
                   interval_key in step.parameters["files"].keys():
                    data[k] = 0

        trj_data = data["trj"]
        en_data = data["en"]
        log_data = (data["log"] + data["temp"] + data["nb"])
        rest_data = num_atoms_all * REST_B_PER_ATOM
        total_data["trj"] += trj_data
        total_data["log"] += log_data
        total_data["en"] += en_data
        total_data["rest"] += rest_data

        data = (trj_data + log_data + rest_data + en_data) / CONV_MB

    logger.info("Your runs will waste approx. {:.2f} MB of storage. "
                "Per replica: {:.2f} MB (trj: {:.1f}, log: {:.1f}, "
                "en: {:.1f}, rest: {:.1f})".format(
                    sum(total_data.values()) / CONV_MB * repeats,
                    sum(total_data.values()) / CONV_MB,
                    total_data["trj"] / CONV_MB, total_data["log"] / CONV_MB,
                    total_data["en"] / CONV_MB, total_data["rest"] / CONV_MB))

    # remove temporary directory
    shutil.rmtree(TMPDIR)

    return rep_dirs
Beispiel #6
0
 def test_convert_placeholders_fail(self):
     qstruct = QStruct("data/ace_ash_nma.pdb", "pdb")
     with pytest.raises(QStructError):
         qstruct.convert_placeholders("$1.CB$")