Ejemplo n.º 1
0
    def __init__(self, part2_string):
        self._part2_string = part2_string
        self.data = DataContainer(self._COLUMN_TITLES)

        self._parse()
        if not self.data.get_rows():
            raise QFepOutputError("Part2 is empty (no rows).")
Ejemplo n.º 2
0
    def dg_all(self):
        """DataContainer with all main and subcalc free energies."""
        subcalcs = sorted(self.sub_calcs.keys())
        coltitles = ["Qfep_output", "dG*", "dG0", "dG_lambda"]
        for sc in subcalcs:
            coltitles.extend([
                "{}_dG*".format(sc), "{}_dG0".format(sc),
                "{}_dG_lambda".format(sc)
            ])

        dc = DataContainer(coltitles)
        for qfep_output in sorted(self.qfos):
            relp = os.path.relpath(qfep_output)
            dga = self.dgas.get(qfep_output, None)
            dg0 = self.dg0s.get(qfep_output, None)
            dg_fep = self.dgs_fep.get(qfep_output, None)
            row = [relp, dga, dg0, dg_fep]

            for subcalc in subcalcs:
                dga = self.sub_calcs[subcalc].dgas.get(qfep_output, None)
                dg0 = self.sub_calcs[subcalc].dg0s.get(qfep_output, None)
                dg_fep = self.sub_calcs[subcalc].dgs_fep.get(qfep_output, None)
                row.extend([dga, dg0, dg_fep])

            dc.add_row(row)
        return dc
Ejemplo n.º 3
0
class _QFepPart1(object):
    """Class for parsing and storing data from Part1 in Qfep output.

    Part1 contains free energies vs. lambda (FEP).

    If parsing is unsuccessful QFepOutputError is raised,
    else all the data is stored in DataContainer object 'data'.

    Usage:
    cols=["Lambda", "dG"]
    dG_lambda = _QFepPart1.data.get_rows(columns=cols)


    Args:
        part1_string (string):  string of Part1 in qfep output

    """

    _PART1_HEADER = "# lambda(1)      dGf sum(dGf)      dGr sum(dGr)     <dG>"
    _COLUMN_TITLES = ["Lambda", "dGf", "sum_dGf", "dGr", "sum_dGr", "dG"]

    def __init__(self, part1_string):
        self._part1_string = part1_string
        self.data = DataContainer(self._COLUMN_TITLES)

        self._parse()
        if not self.data.get_rows():
            raise QFepOutputError("Part1 is empty (no rows).")

    @property
    def dg(self):
        """Return final dG(lambda)   (FEP)"""
        return self.data.get_columns(["dG"])[0][-1]

    def _parse(self):

        lines = self._part1_string.split('\n')
        # the first line is a comment
        lines.pop(0)

        ## In newer versions of Q, two additional lines are printed
        # to distinguish between 'full', 'exclusions' and 'qcp'
        # check for the two extra lines and remove them
        if "Calculation" in lines[1]:
            lines = lines[2:]
        # comment with column names
        header = lines.pop(0).strip()
        if header != self._PART1_HEADER:
            raise QFepOutputError("Part1 has a wrong header, did the qfep5 "
                                  "binary change?")
        for line in lines:
            line = re.split("#|\!", line)[0].strip()
            if not line:
                continue
            row = [float(x) for x in line.split()]
            self.data.add_row(row)
Ejemplo n.º 4
0
    def lra_stats(self):
        """Calculate average and st.dev of LRA and reorg energies."""

        average_lras = DataContainer([
            "E_type", "(E2-E1)_10_mean", "(E2-E1)_10_std", "(E2-E1)_01_mean",
            "(E2-E1)_01_std", "LRA_mean", "LRA_std", "REORG_mean", "REORG_std"
        ])

        allvals = []
        for lra in self.lras.values():
            rows = lra.get_rows()
            for irow, row in enumerate(rows):
                try:
                    allvals[irow].append(row)
                except IndexError:
                    allvals.append([
                        row,
                    ])

    # allvals now looks like this:
    # [
    #   [
    #     ["EQtot", EQtot_de_st1_1, EQtot_de_st2_1, EQtot_lra_1, EQtot_reorg_1],
    #     ["EQtot", EQtot_de_st1_2, EQtot_de_st2_2, ...], ...
    #   ],
    #   [
    #     ["EQbond", EQbond_de_st1_1, EQbond_de_st2_1, EQbond_lra_1, EQbond_reorg_1],
    #     ["EQbond", EQbond_de_st1_2, EQbond_de_st2_2, ...], ...
    #   ]
    # ]
    #
        for values in allvals:
            # transpose to get [ ["EQtot","EQtot"...],
            #                    [ EQtot_de_st1_1, EQtot_de_st1_2,...],
            #                    [ EQtot_de_st2_1, EQtot_de_st2_2,...], ...]

            values = zip(*values)
            # now they can be easily averaged and std-ed
            e_type = values[0][0]
            de_st1_mean = np.mean(values[1])
            de_st2_mean = np.mean(values[2])
            lra_mean = np.mean(values[3])
            reo_mean = np.mean(values[4])
            de_st1_std = np.std(values[1])
            de_st2_std = np.std(values[2])
            lra_std = np.std(values[3])
            reo_std = np.std(values[4])

            average_lras.add_row([
                e_type, de_st1_mean, de_st1_std, de_st2_mean, de_st2_std,
                lra_mean, lra_std, reo_mean, reo_std
            ])

        return average_lras
Ejemplo n.º 5
0
    def __init__(self, part3_string):
        self._part3_string = part3_string
        self.data = DataContainer(self._COLUMN_TITLES)
        self._dga = None
        self._dg0 = None
        self._maxima_bins = None
        self._minima_bins = None
        self.warning = None

        self._parse()
        if not self.data.get_rows():
            raise QFepOutputError("Part3 is empty (no rows).")
Ejemplo n.º 6
0
class _QFepPart2(object):
    """Class for parsing and storing data from Part2 in Qfep output.

    Part2 contains the results of the binning process - EVB ground state
    free energy vs. lambda and Egap, as well as the diabatic free energy
    profiles.

    If parsing is unsuccessful QFepOutputError is raised,
    else all the data is stored in DataContainer object 'data'.

    Usage:
    cols=["Lambda", "dGg"]
    dGg_lambda = _QFepPart2.data.get_rows(columns=cols)


    Args:
        part2_string (string):  string of Part2 in qfep output

    """

    _PART2_HEADER = "# Lambda(1)  bin Energy gap      dGa     dGb     dGg    "\
                    "# pts    c1**2    c2**2"

    _COLUMN_TITLES = [
        "Lambda", "bin", "Egap", "dGa", "dGb", "dGg", "points", "c1**2",
        "c2**2"
    ]

    def __init__(self, part2_string):
        self._part2_string = part2_string
        self.data = DataContainer(self._COLUMN_TITLES)

        self._parse()
        if not self.data.get_rows():
            raise QFepOutputError("Part2 is empty (no rows).")

    def _parse(self):
        lines = self._part2_string.split('\n')
        # the first line is a comment
        lines.pop(0)
        # comment with column names
        header = lines.pop(0).strip()
        if header != self._PART2_HEADER:
            raise QFepOutputError("Part2 has a wrong header, did the qfep5 "
                                  "binary change?")
        for line in lines:
            line = re.split("#|\!", line)[0].strip()
            if not line:
                continue
            row = [float(x) for x in line.split()]
            self.data.add_row(row)
Ejemplo n.º 7
0
    def get_temps(self, stride=1):
        """
        Get temperatures from all logfiles combined.
        Args:
           stride (int, optional):  use only every Nth point, default=1
        Returns:
           temperatures (DataContainer)
        """

        # "Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent"
        cts = self.analysed[0].data_temp.column_titles
        temps = DataContainer(cts)

        for qdo in self.analysed:
            rows = qdo.data_temp.get_rows()
            for row in rows[::stride]:
                temps.add_row(row)
        return temps
Ejemplo n.º 8
0
 def __init__(self, part0_string, num_evb_states, calc_index):
     self._part0_string = part0_string
     self._num_evb_states = num_evb_states
     self._calc_index = calc_index
     self.data_state = [DataContainer(self._COLUMN_TITLES) for _ in
                                                     range(num_evb_states)]
     self._parse(calc_index)
     for e_dc in self.data_state:
         if not e_dc.get_rows():
             raise QFepOutputError("Part0 is empty (no rows).")
Ejemplo n.º 9
0
    def get_offdiags(self, stride=1):
        """Get distances from all logfiles combined.

        Args:
           stride (int, optional):  use only every Nth point, default=1

        Returns:
           distances (dict):  e.g: { "13_31": DataContainer,
                                     "13_18": DataContainer }
        """

        coltitles = self.analysed[0].data_offdiags.column_titles
        dists = DataContainer(coltitles)

        for qdo in self.analysed:
            rows = qdo.data_offdiags.get_rows()
            for row in rows[::stride]:
                dists.add_row(row)
        return dists
Ejemplo n.º 10
0
    def __init__(self, qcalc_exec, calcdirs, pdb_file, en_list_fn,
                 lambdas_A, lambdas_B, resid_first, resid_last,
                 scale_ionized, nthreads, qmask=None):

        self._en_list_fn = en_list_fn
        self._qcalc_exec = qcalc_exec
        try:
            self._pdb_qstruct = QStruct(pdb_file, "pdb")
        except QStructError as error_msg:
            raise QGroupContribError("Can't parse PDB file '{}': {}"
                                     "".format(pdb_file, error_msg))

        self._calcdirs = [os.path.relpath(cd) for cd in calcdirs]
        self._nthreads = nthreads
        self._lambdas_A = lambdas_A
        self._lambdas_B = lambdas_B
        self._resid_first = resid_first
        self._resid_last = resid_last
        self._scale_ionized = scale_ionized
        self._qmask = qmask


        self._qcalc_io = ODict()
        self.gcs = ODict()
        self.failed = ODict()
        self.qcalc_version = None

        self.kill_event = threading.Event()

        lambda1_st1, lambda2_st1 = lambdas_A[0], lambdas_B[0]
        sci = self._scale_ionized
        colnames = ["Residue id",
                    "Residue name",
                    "N",
                    "VdW(l={:5.4f}->l={:5.4f})_mean"
                    "".format(lambda1_st1, lambda2_st1),
                    "VdW(l={:5.4f}->l={:5.4f})_stdev"
                    "".format(lambda1_st1, lambda2_st1),
                    "El(l={:5.4f}->l={:5.4f})_(scale={})_mean"
                    "".format(lambda1_st1, lambda2_st1, sci),
                    "El(l={:5.4f}->l={:5.4f})_(scale={})_stdev"
                    "".format(lambda1_st1, lambda2_st1, sci)]
        self.gcs_stats = DataContainer(colnames)
Ejemplo n.º 11
0
    def get_energies(self, e_type, stride=1):
        """Get energies from all logfiles combined.

        Args:
           e_type (string):  keys in QDynOutput.map_en_section dictionary
           stride (int, optional):  use only every Nth point, default=1

        Returns:
           energies (DataContainer)
        """

        cts = self.analysed[0].map_en_section[e_type].column_titles
        energies = DataContainer(cts)

        for qdo in self.analysed:
            rows = qdo.map_en_section[e_type].get_rows()
            for row in rows[::stride]:
                energies.add_row(row)
        return energies
Ejemplo n.º 12
0
    def get_offdiags(self, percent_skip=0, stride=1):
        """
        Get distances from all logfiles combined.
        Args:
           percent_skip (int, optional):  percent of datapoints in each
                                          logfile to skip, default=0
           stride (int, optional):  use only every Nth point, default=1
        Returns:
           distances (dict):  e.g: { "13_31": DataContainer,
                                     "13_18": DataContainer }
        """

        coltitles = list(self.analysed[0].data_offdiags.get_column_titles())
        dists = DataContainer(coltitles)

        for qad in self.analysed:
            rows = qad.data_offdiags.get_rows()
            skip = int(round(len(rows) * percent_skip / 100.0))
            for row in rows[skip::stride]:
                dists.add_row(row)
        return dists
Ejemplo n.º 13
0
    def get_temps(self, percent_skip=0, stride=1):
        """
        Get temperatures from all logfiles combined.
        Args:
           percent_skip (int, optional):  percent of datapoints in each
                                          logfile to skip, default=0
           stride (int, optional):  use only every Nth point, default=1
        Returns:
           temperatures (DataContainer)
        """

        # "Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent"
        cts = list(self.analysed[0].data_temp.get_column_titles())
        temps = DataContainer(cts)

        for qad in self.analysed:
            rows = qad.data_temp.get_rows()
            skip = int(round(len(rows) * percent_skip / 100.0))
            for row in rows[skip::stride]:
                temps.add_row(row)
        return temps
Ejemplo n.º 14
0
    def get_energies(self, e_type, percent_skip=0, stride=1):
        """
        Get energies from all logfiles combined.
        Args:
           e_type (string):  keys in QAnalyseDyn.map_en_section dictionary
           percent_skip (int, optional):  percent of datapoints in each
                                          logfile to skip, default=0
           stride (int, optional):  use only every Nth point, default=1
        Returns:
           energies (DataContainer)
        """

        cts = list(self.analysed[0].map_en_section[e_type].get_column_titles())
        energies = DataContainer(cts)

        for qad in self.analysed:
            rows = qad.map_en_section[e_type].get_rows()
            skip = int(round(len(rows) * percent_skip / 100.0))
            for row in rows[skip::stride]:
                energies.add_row(row)
        return energies
Ejemplo n.º 15
0
    def _parse_dyn(self):
        """Parses the dynamics part of the Qdyn output (called by init)

        Extracts all the temperatures, energies, Q energies and off-diagonals.
        """

        # tmp temperature vars
        t_free, t_tot = None, None
        temps_q6 = {"Total": [], "Free": [], "Solute": [],
                    "Solvent": [], "time": []}
        # tmp offdiagonal vars
        tmp_offdiags = ODict()
        for atom1, atom2 in self.header.offdiagonals:
            k = "{}_{}".format(atom1, atom2)
            tmp_offdiags[k] = []

        time = self.time_begin
        insection = False
        step = 0
        with gzopen(self._qdyn_output) as qdyn_output:
            qdyn_output.seek(self._header_length)
            for line in qdyn_output:
                lf = line.split()
                if not lf:
                    continue
                if "Initialising dynamics" in line:
                    raise QDynOutputError("Found more than one qdyn_output...",
                                           "Please don't concatenate...")

                # Temperature
                if self.header.qdyn_version > "6":
                    if "temperature at step" in line:
                        # fix for large step numbers
                        lf = line.replace("step", "step ")
                        lf = lf.replace("System", "").split()
                        t_type, t, step = lf[0], float(lf[6]), int(lf[4])
                        temps_q6[t_type].append(t)
                        if t_type == "Total":
                            time = step * self.header.stepsize \
                                 * self._timeconv  + self.time_begin
                            temps_q6["time"].append(time)
                else:
                    # second line with temps (pre Q6)
                    if t_free != None: # second line with temps
                        try:
                            tf_solute = float(lf[1])
                        except: # gas phase
                            tf_solute = 0
                        try:
                            tf_solvent = float(lf[3])
                        except: # gas phase
                            tf_solvent = 0
                        self.data_temp.add_row((time, t_tot, t_free,
                                                tf_solute, tf_solvent))
                        t_free, t_tot = None, None
                    # first line with temps (pre Q6)
                    elif "Temperature at step" in line:
                        # fix for large step numbers
                        lf = line.replace("step", "step ").split()
                        step = int(lf[3].strip(":"))
                        time = step * self.header.stepsize \
                             * self._timeconv  + self.time_begin
                        t_tot, t_free = float(lf[5]), float(lf[7])

                if "Energy summary at step" in line or \
                        "Q-atom energies at step" in line:
                    insection = True
                    step = int(lf[5])
                    time = step * self.header.stepsize \
                         * self._timeconv  + self.time_begin

                elif "FINAL  Energy summary" in line or \
                        "FINAL Q-atom energies" in line:
                    insection = True
                    time = self.header.nsteps * self.header.stepsize \
                         * self._timeconv  + self.time_begin

                elif "===================================================="\
                     "======================" in line:
                    insection = False

                # skip the 0th step
                if step == 0:
                    continue
                elif insection:
                    key = lf[0]
                    if key in self.map_en_section:
                        row = [time,] + [float(x) for x in lf[1:]]
                        self.map_en_section[key].add_row(row)
                    elif key in self.map_qen_section:
                        evb_index = int(lf[1]) - 1
                        row = [time,] + [float(x) for x in lf[2:]]
                        self.map_qen_section[key][evb_index].add_row(row)
                    elif "dist. between" in line:
                        atom1, atom2, dist = lf[8], lf[9], float(lf[11])
                        k = "{}_{}".format(atom1, atom2)
                        tmp_offdiags[k].append([time, dist])

        # join temperatures to one DataContainer (Q6+)
        for i, time in enumerate(temps_q6["time"]):
            try:
                t_solv = temps_q6["Solvent"][i]
            except IndexError:
                # gas phase
                t_solv = 0

            self.data_temp.add_row((time,
                                    temps_q6["Total"][i],
                                    temps_q6["Free"][i],
                                    temps_q6["Solute"][i],
                                    t_solv))

        # join Offdiagonal distances to single DataContainer
        offd_keys = list(tmp_offdiags.keys())
        cts = ["Time",] + offd_keys
        self.data_offdiags = DataContainer(cts)
        for i, (time, _) in enumerate(list(tmp_offdiags.values())[0]):
            row = [time,] + [tmp_offdiags[k][i][1] for k in offd_keys]
            self.data_offdiags.add_row(row)
Ejemplo n.º 16
0
    def calcall(self):
        """Run the GC calcs, update .gcs, .failed and .gcs_stats.
        """
        semaphore = threading.BoundedSemaphore(self._nthreads)

        self._qcalc_io.clear()
        self.gcs.clear()
        self.gcs_stats.delete_rows()
        self.failed.clear()
        threads = []
        for calcdir in self._calcdirs:
            threads.append(_QGroupContribThread(self, semaphore, calcdir))
            threads[-1].start()

        for t in threads:
            while t.isAlive():
                t.join(1.0)
                if self.kill_event.is_set():
                    try:
                        t.qcalc.process.terminate()
                    except Exception as e:
                        pass
                    return

            if t.error:
                self.failed[t.calcdir] = t.error
            else:
                self._qcalc_io[t.calcdir] = (t.qinps, t.qouts)

        # parse the output for results and
        # calculate LRAs for each dir
        for _dir, (_, qouts) in self._qcalc_io.iteritems():
            gcs = []
            failed_flag = False
            for qout in qouts:
                try:
                    qco = QCalcOutput(qout)
                    res = qco.results["gc"]
                    if not self.qcalc_version:
                        self.qcalc_version = qco.qcalc_version
                except (QCalcError, KeyError) as error_msg:
                    self.failed[_dir] = error_msg
                    failed_flag = True
                    break
                gc = {}
                for row in res.get_rows():
                    resid, vdw, el = int(row[0]), float(row[1]), float(row[2])
                    gc[resid] = {"vdw": vdw, "el": el}
                gcs.append(gc)

            if failed_flag:
                continue

            resids = sorted(gcs[0].keys())
            resnames = [
                self._pdb_qstruct.residues[ri - 1].name for ri in resids
            ]

            # do the LRA thingy
            # LRA = 0.5*(<E2-E1>_conf1+<E2-E1>_conf2)
            # REORG = <E2-E1>_conf1 - LRA
            e2e1_st1_vdw = [
                gcs[1][key]["vdw"] - gcs[0][key]["vdw"] for key in resids
            ]
            e2e1_st1_el = [
                gcs[1][key]["el"] - gcs[0][key]["el"] for key in resids
            ]
            e2e1_st2_vdw = [
                gcs[3][key]["vdw"] - gcs[2][key]["vdw"] for key in resids
            ]
            e2e1_st2_el = [
                gcs[3][key]["el"] - gcs[2][key]["el"] for key in resids
            ]

            # super efficient stuff here
            vdw_lra = [
                0.5 * (a + b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)
            ]
            el_lra = [0.5 * (a + b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)]
            vdw_reorg = [
                0.5 * (a - b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)
            ]
            el_reorg = [
                0.5 * (a - b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)
            ]

            # scale the ionized residues
            if abs(self._scale_ionized - 1.0) > 1e-7:
                for i, resname in enumerate(resnames):
                    if resname in ("ARG", "LYS", "HIP", "ASP", "GLU"):
                        e2e1_st1_el[i] = e2e1_st1_el[i] / self._scale_ionized
                        e2e1_st2_el[i] = e2e1_st2_el[i] / self._scale_ionized
                        el_lra[i] = el_lra[i] / self._scale_ionized
                        el_reorg[i] = el_reorg[i] / self._scale_ionized

            # write the DataContainer
            lambda1_st1 = self._lambdas_A[0]
            lambda2_st1 = self._lambdas_B[0]
            gc_lra = DataContainer([
                "Residue_id", "Residue name",
                "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized),
                "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1,
                          self._scale_ionized), "LRA_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized),
                "REORG_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized)
            ])

            for row in zip(resids, resnames, e2e1_st1_vdw, e2e1_st1_el,
                           e2e1_st2_vdw, e2e1_st2_el, vdw_lra, el_lra,
                           vdw_reorg, el_reorg):
                gc_lra.add_row(row)

            self.gcs[_dir] = gc_lra

        # get GC stats over all directories
        self.gcs_stats.delete_rows()
        gcs = {}
        for _, gc in self.gcs.iteritems():
            for row in gc.get_rows():
                resid, resname = row[0:2]
                res_key = "{}.{}".format(resid, resname)
                values = [[
                    val,
                ] for val in row[2:]]
                if not gcs.has_key(res_key):
                    gcs[res_key] = values
                else:
                    for i, val in enumerate(gcs[res_key]):
                        val.extend(values[i])

        # iterate through each residue and calculate
        # means and stdevs
        # (sort by residue index)
        for res_key in sorted(gcs.keys(), key=lambda x: int(x.split(".")[0])):
            rc = gcs[res_key]
            resid, resname = res_key.split(".")
            # get mean and stdev
            rc_stats = [
                int(resid),
                resname,
                len(rc[0]),
                np.mean(rc[0]),
                np.std(rc[0]),  # <E2-E1>1 vdw
                np.mean(rc[1]),
                np.std(rc[1]),  # <E2-E1>1 el
                np.mean(rc[2]),
                np.std(rc[2]),  # <E2-E1>2 vdw
                np.mean(rc[3]),
                np.std(rc[3]),  # <E2-E1>2 el
                np.mean(rc[4]),
                np.std(rc[4]),  # LRA vdw
                np.mean(rc[5]),
                np.std(rc[5]),  # LRA el
                np.mean(rc[6]),
                np.std(rc[6]),  # REORG vdw
                np.mean(rc[7]),
                np.std(rc[7])
            ]  # REORG el

            self.gcs_stats.add_row(rc_stats)
Ejemplo n.º 17
0
    def __init__(self, qdyn_output,
                 time_unit="ps", step_size=None, start_time=0):

        self._qdyn_output = qdyn_output

        _MAP_TIME = {"fs": 1.0, "ps": 1e-3, "ns": 1e-6}
        if time_unit.lower() not in _MAP_TIME:
            raise QDynOutputError("Timeunit has to be either 'fs',"
                                  "'ps' or 'ns'")
        self._timeconv = _MAP_TIME[time_unit.lower()]
        self._stepsize_user = step_size


        # parse the header
        self.time_begin = start_time
        self.time_unit = time_unit.lower()
        self._parse_header()


        ###  Datacontainer variables for storing all the data
        # temperature
        self.data_temp = DataContainer(["Time", "T_tot", "T_free",
                                        "T_free_solute", "T_free_solvent"])
        # energies
        columns1 = ["Time", "El", "VdW", "Bond",
                    "Angle", "Torsion", "Improper"]
        columns2 = ["Time", "Total", "Fix", "Solvent_rad",
                    "Solvent_pol", "Shell", "Solute"]
        columns3 = ["Time", "Total", "Potential", "Kinetic"]
        self.data_E_solute = DataContainer(columns1)
        self.data_E_solvent = DataContainer(columns1)
        self.data_E_solute_solvent = DataContainer(["Time", "El", "VdW"])
        self.data_E_LRF = DataContainer(["Time", "El"])
        self.data_E_Q_atom = DataContainer(columns1)
        self.data_E_restraints = DataContainer(columns2)
        self.data_E_SUM = DataContainer(columns3)
        # Q energies
        q_columns1 = ("Time", "Lambda", "El", "VdW")
        q_columns2 = ("Time", "Lambda", "El", "VdW", "Bond",
                      "Angle", "Torsion", "Improper")
        q_columns3 = ("Time", "Lambda", "Total", "Restraint")

        self.data_EQ_Q, self.data_EQ_prot = [], []
        self.data_EQ_wat, self.data_EQ_surr = [], []
        self.data_EQ_any, self.data_EQ_SUM = [], []
        for i in range(self.header.nstates):
            self.data_EQ_Q.append(DataContainer(q_columns1))
            self.data_EQ_prot.append(DataContainer(q_columns1))
            self.data_EQ_wat.append(DataContainer(q_columns1))
            self.data_EQ_surr.append(DataContainer(q_columns1))
            self.data_EQ_any.append(DataContainer(q_columns2))
            self.data_EQ_SUM.append(DataContainer(q_columns3))

        # mapping of energy types (label in the output) with containers
        self.map_en_section = ODict([("solute", self.data_E_solute),
                                     ("solvent", self.data_E_solvent),
                                     ("solute-solvent",
                                         self.data_E_solute_solvent),
                                     ("LRF", self.data_E_LRF),
                                     ("Q-atom", self.data_E_Q_atom),
                                     ("restraints", self.data_E_restraints),
                                     ("SUM", self.data_E_SUM)])

        self.map_qen_section = ODict([("Q-Q", self.data_EQ_Q),
                                      ("Q-prot", self.data_EQ_prot),
                                      ("Q-wat", self.data_EQ_wat),
                                      ("Q-surr.", self.data_EQ_surr),
                                      ("Q-any", self.data_EQ_any),
                                      ("Q-SUM", self.data_EQ_SUM)])


        # parse the rest
        self._parse_dyn()
        self.time_end = self.header.nsteps \
                      * self.header.stepsize \
                      * self._timeconv \
                      + self.time_begin
Ejemplo n.º 18
0
class QDynOutput(object):
    """Class for parsing Qdyn output and storing the data.

    Supports Qdyn versions 5.10 or higher.
    Typically used indirectly by wrapper QAnalyseDyns.

    Args:
        qdyn_output (string): Qdyn output filename
        time_unit (string): fs,ps,ns (optional, default is ps)
        step_size (float): use in case the output reads 0.000
        start_time (float): redefine the start time in given units\
                            in case of continuation simulation (default is 0)

    Examples:
        # Load a qdyn output
        >>> qdo = QDynOutput("qdyn.log")
        # list 
        >>> print qdo.data_EQ_Q[0].get_rows(["Time", "El"])
        # print out the Q-Q electrostatic energy
        >>> print qdo.data_EQ_Q[0].get_rows(["Time", "El"])
    """
    # TODO: write examples above



    def __init__(self, qdyn_output,
                 time_unit="ps", step_size=None, start_time=0):

        self._qdyn_output = qdyn_output

        _MAP_TIME = {"fs": 1.0, "ps": 1e-3, "ns": 1e-6}
        if time_unit.lower() not in _MAP_TIME:
            raise QDynOutputError("Timeunit has to be either 'fs',"
                                  "'ps' or 'ns'")
        self._timeconv = _MAP_TIME[time_unit.lower()]
        self._stepsize_user = step_size


        # parse the header
        self.time_begin = start_time
        self.time_unit = time_unit.lower()
        self._parse_header()


        ###  Datacontainer variables for storing all the data
        # temperature
        self.data_temp = DataContainer(["Time", "T_tot", "T_free",
                                        "T_free_solute", "T_free_solvent"])
        # energies
        columns1 = ["Time", "El", "VdW", "Bond",
                    "Angle", "Torsion", "Improper"]
        columns2 = ["Time", "Total", "Fix", "Solvent_rad",
                    "Solvent_pol", "Shell", "Solute"]
        columns3 = ["Time", "Total", "Potential", "Kinetic"]
        self.data_E_solute = DataContainer(columns1)
        self.data_E_solvent = DataContainer(columns1)
        self.data_E_solute_solvent = DataContainer(["Time", "El", "VdW"])
        self.data_E_LRF = DataContainer(["Time", "El"])
        self.data_E_Q_atom = DataContainer(columns1)
        self.data_E_restraints = DataContainer(columns2)
        self.data_E_SUM = DataContainer(columns3)
        # Q energies
        q_columns1 = ("Time", "Lambda", "El", "VdW")
        q_columns2 = ("Time", "Lambda", "El", "VdW", "Bond",
                      "Angle", "Torsion", "Improper")
        q_columns3 = ("Time", "Lambda", "Total", "Restraint")

        self.data_EQ_Q, self.data_EQ_prot = [], []
        self.data_EQ_wat, self.data_EQ_surr = [], []
        self.data_EQ_any, self.data_EQ_SUM = [], []
        for i in range(self.header.nstates):
            self.data_EQ_Q.append(DataContainer(q_columns1))
            self.data_EQ_prot.append(DataContainer(q_columns1))
            self.data_EQ_wat.append(DataContainer(q_columns1))
            self.data_EQ_surr.append(DataContainer(q_columns1))
            self.data_EQ_any.append(DataContainer(q_columns2))
            self.data_EQ_SUM.append(DataContainer(q_columns3))

        # mapping of energy types (label in the output) with containers
        self.map_en_section = ODict([("solute", self.data_E_solute),
                                     ("solvent", self.data_E_solvent),
                                     ("solute-solvent",
                                         self.data_E_solute_solvent),
                                     ("LRF", self.data_E_LRF),
                                     ("Q-atom", self.data_E_Q_atom),
                                     ("restraints", self.data_E_restraints),
                                     ("SUM", self.data_E_SUM)])

        self.map_qen_section = ODict([("Q-Q", self.data_EQ_Q),
                                      ("Q-prot", self.data_EQ_prot),
                                      ("Q-wat", self.data_EQ_wat),
                                      ("Q-surr.", self.data_EQ_surr),
                                      ("Q-any", self.data_EQ_any),
                                      ("Q-SUM", self.data_EQ_SUM)])


        # parse the rest
        self._parse_dyn()
        self.time_end = self.header.nsteps \
                      * self.header.stepsize \
                      * self._timeconv \
                      + self.time_begin



    def _parse_header(self):
        """Parses the header of the Qdyn output (called by init)
        """
        header_string = ""
        try:
            with gzopen(self._qdyn_output) as qdo:
                for line in qdo:
                    header_string += line
                    if "Initialising dynamics" in line:
                        break
        except IOError as e:
            raise QDynOutputError("Could not read the Qdyn output: {}"
                                  "".format(e))

        self.header = _QDynHeader(header_string, step_size=self._stepsize_user)
        self._header_length = len(header_string)




    def _parse_dyn(self):
        """Parses the dynamics part of the Qdyn output (called by init)

        Extracts all the temperatures, energies, Q energies and off-diagonals.
        """

        # tmp temperature vars
        t_free, t_tot = None, None
        temps_q6 = {"Total": [], "Free": [], "Solute": [],
                    "Solvent": [], "time": []}
        # tmp offdiagonal vars
        tmp_offdiags = ODict()
        for atom1, atom2 in self.header.offdiagonals:
            k = "{}_{}".format(atom1, atom2)
            tmp_offdiags[k] = []

        time = self.time_begin
        insection = False
        step = 0
        with gzopen(self._qdyn_output) as qdyn_output:
            qdyn_output.seek(self._header_length)
            for line in qdyn_output:
                lf = line.split()
                if not lf:
                    continue
                if "Initialising dynamics" in line:
                    raise QDynOutputError("Found more than one qdyn_output...",
                                           "Please don't concatenate...")

                # Temperature
                if self.header.qdyn_version > "6":
                    if "temperature at step" in line:
                        # fix for large step numbers
                        lf = line.replace("step", "step ")
                        lf = lf.replace("System", "").split()
                        t_type, t, step = lf[0], float(lf[6]), int(lf[4])
                        temps_q6[t_type].append(t)
                        if t_type == "Total":
                            time = step * self.header.stepsize \
                                 * self._timeconv  + self.time_begin
                            temps_q6["time"].append(time)
                else:
                    # second line with temps (pre Q6)
                    if t_free != None: # second line with temps
                        try:
                            tf_solute = float(lf[1])
                        except: # gas phase
                            tf_solute = 0
                        try:
                            tf_solvent = float(lf[3])
                        except: # gas phase
                            tf_solvent = 0
                        self.data_temp.add_row((time, t_tot, t_free,
                                                tf_solute, tf_solvent))
                        t_free, t_tot = None, None
                    # first line with temps (pre Q6)
                    elif "Temperature at step" in line:
                        # fix for large step numbers
                        lf = line.replace("step", "step ").split()
                        step = int(lf[3].strip(":"))
                        time = step * self.header.stepsize \
                             * self._timeconv  + self.time_begin
                        t_tot, t_free = float(lf[5]), float(lf[7])

                if "Energy summary at step" in line or \
                        "Q-atom energies at step" in line:
                    insection = True
                    step = int(lf[5])
                    time = step * self.header.stepsize \
                         * self._timeconv  + self.time_begin

                elif "FINAL  Energy summary" in line or \
                        "FINAL Q-atom energies" in line:
                    insection = True
                    time = self.header.nsteps * self.header.stepsize \
                         * self._timeconv  + self.time_begin

                elif "===================================================="\
                     "======================" in line:
                    insection = False

                # skip the 0th step
                if step == 0:
                    continue
                elif insection:
                    key = lf[0]
                    if key in self.map_en_section:
                        row = [time,] + [float(x) for x in lf[1:]]
                        self.map_en_section[key].add_row(row)
                    elif key in self.map_qen_section:
                        evb_index = int(lf[1]) - 1
                        row = [time,] + [float(x) for x in lf[2:]]
                        self.map_qen_section[key][evb_index].add_row(row)
                    elif "dist. between" in line:
                        atom1, atom2, dist = lf[8], lf[9], float(lf[11])
                        k = "{}_{}".format(atom1, atom2)
                        tmp_offdiags[k].append([time, dist])

        # join temperatures to one DataContainer (Q6+)
        for i, time in enumerate(temps_q6["time"]):
            try:
                t_solv = temps_q6["Solvent"][i]
            except IndexError:
                # gas phase
                t_solv = 0

            self.data_temp.add_row((time,
                                    temps_q6["Total"][i],
                                    temps_q6["Free"][i],
                                    temps_q6["Solute"][i],
                                    t_solv))

        # join Offdiagonal distances to single DataContainer
        offd_keys = list(tmp_offdiags.keys())
        cts = ["Time",] + offd_keys
        self.data_offdiags = DataContainer(cts)
        for i, (time, _) in enumerate(list(tmp_offdiags.values())[0]):
            row = [time,] + [tmp_offdiags[k][i][1] for k in offd_keys]
            self.data_offdiags.add_row(row)
Ejemplo n.º 19
0
class QGroupContrib(object):
    """Class for calculating LRA group contributions from EVB trajectories.

    Args:
        qcalc_exec (string): path of qcalc executable
        calcdirs (list of strings): list of directories
        pdb_file (string): PDB created with qprep
        en_list_fn (string): energy-files-list filename
        lambdas_A (tuple of floats): lambdas of state A (1.0, 0.0)
        lambdas_B (tuple of floats): lambdas of state B (0.5, 0.5)
        resid_first (int): index of first residue used for calcs
        resid_last (int): index of last residue used for calcs
        scale_ionized (float): scale down ionized residues (ARG, LYS, \
                               HIP, GLU, ASP) by this factor \
                               (see doi:10.1021/jp962478o)
        nthreads (int): number of threads
        qmask (list of ints): list of atom indexes to be used as the\
                              Q mask for the GC calculations. By default,\
                              this is obtained from the FEP file.

    """
    def __init__(self,
                 qcalc_exec,
                 calcdirs,
                 pdb_file,
                 en_list_fn,
                 lambdas_A,
                 lambdas_B,
                 resid_first,
                 resid_last,
                 scale_ionized,
                 nthreads,
                 qmask=None):

        self._en_list_fn = en_list_fn
        self._qcalc_exec = qcalc_exec
        try:
            self._pdb_qstruct = QStruct(pdb_file, "pdb")
        except QStructError as error_msg:
            raise QGroupContribError("Can't parse PDB file '{}': {}"
                                     "".format(pdb_file, error_msg))

        self._calcdirs = [os.path.relpath(cd) for cd in calcdirs]
        self._nthreads = nthreads
        self._lambdas_A = lambdas_A
        self._lambdas_B = lambdas_B
        self._resid_first = resid_first
        self._resid_last = resid_last
        self._scale_ionized = scale_ionized
        self._qmask = qmask

        self._qcalc_io = ODict()
        self.gcs = ODict()
        self.failed = ODict()
        self.qcalc_version = None

        self.kill_event = threading.Event()

        lambda1_st1, lambda2_st1 = lambdas_A[0], lambdas_B[0]
        sci = self._scale_ionized
        colnames = [
            "Residue id", "Residue name", "N",
            "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "LRA_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "LRA_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1, lambda2_st1),
            "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})_mean"
            "".format(lambda1_st1, lambda2_st1,
                      sci), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})_stdev"
            "".format(lambda1_st1, lambda2_st1,
                      sci), "REORG_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "REORG_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1, lambda2_st1),
            "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})_mean"
            "".format(lambda1_st1, lambda2_st1,
                      sci), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})_stdev"
            "".format(lambda1_st1, lambda2_st1, sci)
        ]
        self.gcs_stats = DataContainer(colnames)

    def calcall(self):
        """Run the GC calcs, update .gcs, .failed and .gcs_stats.
        """
        semaphore = threading.BoundedSemaphore(self._nthreads)

        self._qcalc_io.clear()
        self.gcs.clear()
        self.gcs_stats.delete_rows()
        self.failed.clear()
        threads = []
        for calcdir in self._calcdirs:
            threads.append(_QGroupContribThread(self, semaphore, calcdir))
            threads[-1].start()

        for t in threads:
            while t.isAlive():
                t.join(1.0)
                if self.kill_event.is_set():
                    try:
                        t.qcalc.process.terminate()
                    except Exception as e:
                        pass
                    return

            if t.error:
                self.failed[t.calcdir] = t.error
            else:
                self._qcalc_io[t.calcdir] = (t.qinps, t.qouts)

        # parse the output for results and
        # calculate LRAs for each dir
        for _dir, (_, qouts) in self._qcalc_io.iteritems():
            gcs = []
            failed_flag = False
            for qout in qouts:
                try:
                    qco = QCalcOutput(qout)
                    res = qco.results["gc"]
                    if not self.qcalc_version:
                        self.qcalc_version = qco.qcalc_version
                except (QCalcError, KeyError) as error_msg:
                    self.failed[_dir] = error_msg
                    failed_flag = True
                    break
                gc = {}
                for row in res.get_rows():
                    resid, vdw, el = int(row[0]), float(row[1]), float(row[2])
                    gc[resid] = {"vdw": vdw, "el": el}
                gcs.append(gc)

            if failed_flag:
                continue

            resids = sorted(gcs[0].keys())
            resnames = [
                self._pdb_qstruct.residues[ri - 1].name for ri in resids
            ]

            # do the LRA thingy
            # LRA = 0.5*(<E2-E1>_conf1+<E2-E1>_conf2)
            # REORG = <E2-E1>_conf1 - LRA
            e2e1_st1_vdw = [
                gcs[1][key]["vdw"] - gcs[0][key]["vdw"] for key in resids
            ]
            e2e1_st1_el = [
                gcs[1][key]["el"] - gcs[0][key]["el"] for key in resids
            ]
            e2e1_st2_vdw = [
                gcs[3][key]["vdw"] - gcs[2][key]["vdw"] for key in resids
            ]
            e2e1_st2_el = [
                gcs[3][key]["el"] - gcs[2][key]["el"] for key in resids
            ]

            # super efficient stuff here
            vdw_lra = [
                0.5 * (a + b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)
            ]
            el_lra = [0.5 * (a + b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)]
            vdw_reorg = [
                0.5 * (a - b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)
            ]
            el_reorg = [
                0.5 * (a - b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)
            ]

            # scale the ionized residues
            if abs(self._scale_ionized - 1.0) > 1e-7:
                for i, resname in enumerate(resnames):
                    if resname in ("ARG", "LYS", "HIP", "ASP", "GLU"):
                        e2e1_st1_el[i] = e2e1_st1_el[i] / self._scale_ionized
                        e2e1_st2_el[i] = e2e1_st2_el[i] / self._scale_ionized
                        el_lra[i] = el_lra[i] / self._scale_ionized
                        el_reorg[i] = el_reorg[i] / self._scale_ionized

            # write the DataContainer
            lambda1_st1 = self._lambdas_A[0]
            lambda2_st1 = self._lambdas_B[0]
            gc_lra = DataContainer([
                "Residue_id", "Residue name",
                "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized),
                "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1,
                          self._scale_ionized), "LRA_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized),
                "REORG_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized)
            ])

            for row in zip(resids, resnames, e2e1_st1_vdw, e2e1_st1_el,
                           e2e1_st2_vdw, e2e1_st2_el, vdw_lra, el_lra,
                           vdw_reorg, el_reorg):
                gc_lra.add_row(row)

            self.gcs[_dir] = gc_lra

        # get GC stats over all directories
        self.gcs_stats.delete_rows()
        gcs = {}
        for _, gc in self.gcs.iteritems():
            for row in gc.get_rows():
                resid, resname = row[0:2]
                res_key = "{}.{}".format(resid, resname)
                values = [[
                    val,
                ] for val in row[2:]]
                if not gcs.has_key(res_key):
                    gcs[res_key] = values
                else:
                    for i, val in enumerate(gcs[res_key]):
                        val.extend(values[i])

        # iterate through each residue and calculate
        # means and stdevs
        # (sort by residue index)
        for res_key in sorted(gcs.keys(), key=lambda x: int(x.split(".")[0])):
            rc = gcs[res_key]
            resid, resname = res_key.split(".")
            # get mean and stdev
            rc_stats = [
                int(resid),
                resname,
                len(rc[0]),
                np.mean(rc[0]),
                np.std(rc[0]),  # <E2-E1>1 vdw
                np.mean(rc[1]),
                np.std(rc[1]),  # <E2-E1>1 el
                np.mean(rc[2]),
                np.std(rc[2]),  # <E2-E1>2 vdw
                np.mean(rc[3]),
                np.std(rc[3]),  # <E2-E1>2 el
                np.mean(rc[4]),
                np.std(rc[4]),  # LRA vdw
                np.mean(rc[5]),
                np.std(rc[5]),  # LRA el
                np.mean(rc[6]),
                np.std(rc[6]),  # REORG vdw
                np.mean(rc[7]),
                np.std(rc[7])
            ]  # REORG el

            self.gcs_stats.add_row(rc_stats)

    def _calcsingle(self, calcdir, qcalc):
        # find input files with given lambdas
        # (and correct energy files)
        # extract information and run qcalc for each combination
        #   fep_000_1.000.dcd, "1.00 0.00"
        #   fep_000_1.000.dcd, "0.00 0.00"
        #   fep_050_0.000.dcd, "1.00 0.00"
        #   fep_050_0.000.dcd, "0.00 0.00"
        # return input output strings as a tuple of lists of strings
        # ( [inp1, inp2, inp3, inp4], [out1, out2, out3, out4] )
        # or raise QGroupContribError on failure

        # get the list of energy-files
        try:
            en_list_fn = os.path.join(calcdir, self._en_list_fn)
            en_list_fn_str = open(en_list_fn, 'r').read()
        except IOError:
            raise QGroupContribError("No energy-files list '{}'."
                                     "".format(self._en_list_fn))

        en_list = [enf for enf in en_list_fn_str.split("\n") \
                                                if enf.strip() != ""]

        if not en_list:
            raise QGroupContribError("No energy files in '{}'."
                                     "".format(self._en_list_fn))

        # parse all input files in calcdir for
        # a valid energy file and lambda values
        inp_fns = [inp for inp in os.listdir(calcdir) if inp.endswith(".inp")]
        lambda_inp_map = {}
        for inp in inp_fns:
            try:
                inp_file = os.path.join(calcdir, inp)
                qdi = QDynInput(input_string=open(inp_file, "r").read())
            except (IOError, QDynInputError) as error_msg:
                logger.debug("Error reading Q input '{}': {}"
                             "".format(inp, error_msg))
                continue

            try:
                lambda_st1 = float(qdi.parameters["lambdas"].split()[0])
                en_file = qdi.parameters["files"]["energy"]
            except KeyError:
                logger.debug("Input '{}' missing lambda or energy file"
                             "".format(inp))
                continue

            if en_file not in en_list:
                continue

            lambda_key = "{:.6f}".format(lambda_st1)
            try:
                inp2 = lambda_inp_map[lambda_key][0]
            except KeyError:
                lambda_inp_map[lambda_key] = (inp, qdi)
            else:
                raise QGroupContribError("Same lambda values in Qdyn "
                                         "inputs: '{}', '{}' ??"
                                         "".format(inp, inp2))

        # get inputs that match specified state1 lambda values
        lambdas_st1 = (self._lambdas_A[0], self._lambdas_B[0])
        try:
            inputs = []
            for lamb_st1 in lambdas_st1:
                lamb_key = "{:.6f}".format(lamb_st1)
                inputs.append(lambda_inp_map[lamb_key])
        except KeyError:
            raise QGroupContribError("QDyn input with lambda=='{}' "
                                     "(and energy file in '{}') not found."
                                     "".format(lamb_st1, en_list_fn))

        # get topology, fep and trajectory filenames from the inputs
        top_fn, fep_fn, dcd_fns = None, None, []
        for inp, qdi in inputs:
            try:
                tmp_top_fn = qdi.parameters["files"]["topology"]
            except KeyError:
                raise QGroupContribError("Topology not found in Qdyn "
                                         "input '{}'.".format(inp))
            if top_fn and top_fn != tmp_top_fn:
                raise QGroupContribError("Qdyn inputs with different "
                                         "topologies: '{}', '{}' ??"
                                         "".format(top_fn, tmp_top_fn))

            try:
                tmp_fep_fn = qdi.parameters["files"]["fep"]
            except KeyError:
                raise QGroupContribError("Fep file not found in Qdyn "
                                         "input '{}'.".format(inp))
            if fep_fn and fep_fn != tmp_fep_fn:
                raise QGroupContribError("Qdyn inputs with different "
                                         "fep files: '{}', '{}' ??"
                                         "".format(fep_fn, tmp_fep_fn))

            try:
                tmp_dcd_fn = qdi.parameters["files"]["trajectory"]
            except KeyError:
                raise QGroupContribError("Trajectory file not found in Qdyn "
                                         "input '{}'.".format(inp))

            top_fn = tmp_top_fn
            fep_fn = tmp_fep_fn
            dcd_fns.append(tmp_dcd_fn)

        # check if files are missing
        for fn in [top_fn, fep_fn] + dcd_fns:
            if not os.path.lexists(os.path.join(calcdir, fn)):
                raise QGroupContribError("Missing file: {}".format(fn))

        if not self._qmask:
            # parse fep for q atom numbers
            with open(os.path.join(calcdir, fep_fn), "r") as fep:
                section = ""
                q_atoms = []
                for line in fep.readlines():
                    line = line.split("#")[0].split("!")[0].strip()
                    if line == "":
                        continue
                    elif line[0] == "[":
                        section = line
                    elif section == "[atoms]":
                        q_atoms.append(line.split()[1])
        else:
            q_atoms = self._qmask

        masks = ["{} {}".format(ai, ai) for ai in q_atoms]

        # make qcalc inputs for every combination of
        # configuration (dcd) and potential (lambda),
        # run them and return the inputs and outputs
        combs = (
            (dcd_fns[0], self._lambdas_A),  # E1_conf1
            (dcd_fns[0], self._lambdas_B),  # E2_conf1
            (dcd_fns[1], self._lambdas_A),  # E1_conf2
            (dcd_fns[1], self._lambdas_B))  # E2_conf2
        # example with lambdas "1.00 0.00" and "0.50 0.50":
        #
        # fep_000_1.000.dcd, (1.00, 0.00)
        # fep_000_1.000.dcd, (0.50, 0.50)
        # fep_025_0.500.dcd, (1.00, 0.00)
        # fep_025_0.500.dcd, (0.50, 0.50)

        input_strings = []
        output_strings = []
        for dcdfile, lambdas in combs:
            qci = QCalcInput(top_fn, [
                dcdfile,
            ], fep_fn, lambdas)

            qci.add_residue_nb_mon(self._resid_first, self._resid_last, masks)

            qcalc_inp_str = qci.get_string()

            try:
                qcalc_out_str = qcalc.run(qcalc_inp_str, workdir=calcdir)
            except QCalcError as error_msg:
                raise QGroupContribError(error_msg)

            input_strings.append(qcalc_inp_str)
            output_strings.append(qcalc_out_str)

        return (input_strings, output_strings)

    @property
    def details(self):

        fails = "\n".join(["{}: {}".format(cd, e) \
                           for cd, e in self.failed.iteritems()])

        calcdirs = ", ".join(self._calcdirs)
        outstr = """
---------------------------------- GC details ---------------------------------
# Calculated with: Qtools ({version}), Qcalc ({qcalc_version})
# Qcalc path: {qcalc_exec}
# Work dir: {cwd}
# Date: {date}
# CMDline: {cmdline}

Directories:
{dirs}

Fails:
{fails}
-------------------------------------------------------------------------------
""".format(version=__version__,
           cwd=os.getcwd(),
           date=time.ctime(),
           cmdline=" ".join(sys.argv),
           qcalc_version=self.qcalc_version,
           fails=fails or "None",
           dirs=calcdirs,
           qcalc_exec=os.path.abspath(self._qcalc_exec))

        return outstr

    @property
    def plotdata(self):
        """Return GC data as a dictionary of PlotData objects.

        Example keys in returned dictionary:
            'gc_lra_el': PlotData of electrostatic LRA group contributions,
                         one subplot - means vs residue index

            'gc_lra_el_top': PlotData of top 20 electrostatic LRA GCs
                             one subplot - means vs "resid.resname"

            'gc_lra_vdw': PlotData of vdw LRA GCs,
                          one subplot - means vs residue indexes

            'gc_reorg_el': PlotData of el. 'REORG' group contributions,
                           one subplot - means vs residue index

            'gc_de1_el': PlotData of electrostatic <E1 - E2>_1,
                         one subplot - means vs residue index

            'gc_de2_el': PlotData of electrostatic <E1 - E2>_2,
                         one subplot - means vs residue index
        """

        plots = ODict()

        # all failed
        if not self.gcs:
            return plots

        lamb1, lamb2 = self._lambdas_A[0], self._lambdas_B[0]

        # make PlotData objects
        plots["gc_lra_el_top"] = PlotData("Top LRA GC (El, {}->{}, iscale={}),"
                                          " top 20".format(
                                              lamb1, lamb2,
                                              self._scale_ionized),
                                          xlabel="Residue",
                                          ylabel="Free energy  [kcal/mol]",
                                          plot_type="bar")

        plots["gc_reorg_el_top"] = PlotData(
            "Top REORG GC (El, {}->{}, iscale={}),"
            " top 20".format(lamb1, lamb2, self._scale_ionized),
            xlabel="Residue",
            ylabel="Free energy  [kcal/mol]",
            plot_type="bar")

        plots["gc_lra_el"] = PlotData("LRA GC (El, {}->{}, iscale={})"
                                      "".format(lamb1, lamb2,
                                                self._scale_ionized),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_lra_vdw"] = PlotData("LRA GC (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        plots["gc_reorg_el"] = PlotData("REORG GC (El, {}->{}, iscale={})"
                                        "".format(lamb1, lamb2,
                                                  self._scale_ionized),
                                        xlabel="Residue index",
                                        ylabel="Energy  [kcal/mol]",
                                        plot_type="bar")

        plots["gc_reorg_vdw"] = PlotData("REORG GC (VdW, {}->{})"
                                         "".format(lamb1, lamb2),
                                         xlabel="Residue index",
                                         ylabel="Energy  [kcal/mol]",
                                         plot_type="bar")

        plots["gc_de1_el"] = PlotData("<E1-E2>_1 (El, {}->{})"
                                      "".format(lamb1, lamb2),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_de1_vdw"] = PlotData("<E1-E2>_1 (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        plots["gc_de2_el"] = PlotData("<E1-E2>_2 (El, {}->{})"
                                      "".format(lamb1, lamb2),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_de2_vdw"] = PlotData("<E1-E2>_2 (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        cols = self.gcs_stats.get_columns()
        resids = cols[0]
        title = "mean_N={}".format(len(self.gcs))

        plots["gc_de1_vdw"].add_subplot(title, resids, cols[3], yerror=cols[4])
        plots["gc_de1_el"].add_subplot(title, resids, cols[5], yerror=cols[6])

        plots["gc_de2_vdw"].add_subplot(title, resids, cols[7], yerror=cols[8])
        plots["gc_de2_el"].add_subplot(title, resids, cols[9], yerror=cols[10])

        plots["gc_lra_vdw"].add_subplot(title,
                                        resids,
                                        cols[11],
                                        yerror=cols[12])
        plots["gc_lra_el"].add_subplot(title,
                                       resids,
                                       cols[13],
                                       yerror=cols[14])

        plots["gc_reorg_vdw"].add_subplot(title,
                                          resids,
                                          cols[15],
                                          yerror=cols[16])
        plots["gc_reorg_el"].add_subplot(title,
                                         resids,
                                         cols[17],
                                         yerror=cols[18])

        # top 20 LRA el
        sorted_rows = sorted(self.gcs_stats.get_rows(),
                             key=lambda x: -abs(x[5]))[:20]
        cols = zip(*sorted_rows)
        resids, resnames = cols[0], cols[1]
        keys = ["{}_{}".format(rn.capitalize(), ri) \
                                for ri, rn in zip(resids, resnames)]
        els, elstd = cols[13], cols[14]
        plots["gc_lra_el_top"].add_subplot(title, keys, els, yerror=elstd)

        # top 20 reorg el
        sorted_rows = sorted(self.gcs_stats.get_rows(),
                             key=lambda x: -abs(x[9]))[:20]
        cols = zip(*sorted_rows)
        resids, resnames = cols[0], cols[1]
        keys = ["{}_{}".format(rn.capitalize(), ri) \
                                for ri, rn in zip(resids, resnames)]
        els, elstd = cols[17], cols[18]
        plots["gc_reorg_el_top"].add_subplot(title, keys, els, yerror=elstd)

        return plots

    def get_pdbgc(self):
        """Return the structure in PDB format (string) with added GC values.

        Fill the Occupancy fields with LRA contributions and
        Temperature factor fields with REORG contributions.
        """

        try:
            resids, lras, reorgs = self.gcs_stats.get_columns([0, 13, 17])
        except IndexError:
            resids, lras, reorgs = [], [], []
        pdb = []
        for mol in self._pdb_qstruct.molecules:
            for res in mol.residues:
                try:
                    i = resids.index(res.index)
                    lra_gc, reorg_gc = lras[i], reorgs[i]
                except ValueError:
                    lra_gc, reorg_gc = 0, 0

                for atom in res.atoms:
                    x, y, z = atom.coordinates
                    pdb.append("ATOM  {:>5d} {:<4s} {:3s}  {:>4d}    "\
                                "{:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}"\
                                "".format(atom.index, atom.name,
                                          atom.residue.name,
                                          atom.residue.index,
                                          x, y, z, lra_gc, reorg_gc))
            pdb.append("GAP")
        return "\n".join(pdb)
Ejemplo n.º 20
0
class _QFepPart3(object):
    """Class for parsing and storing data from Part3 in Qfep output.

    Part3 contains the bin-averaged dGg values, points and squared
    eigenvectors from Part2.

    If parsing is unsuccessful QFepOutputError is raised,
    else all the data is stored in DataContainer object 'data'.

    Args:
        part3_string (string):  string of Part3 in qfep output

    Usage:
    >>> cols = ["Lambda", "dGg"]
    >>> dGg_lambda = _QFepPart3.data.get_rows(columns=cols)


    """

    _PART3_HEADER = "# bin  energy gap  <dGg> <dGg norm> pts  <c1**2> "\
                   "<c2**2> <r_xy>"

    _COLUMN_TITLES = ["bin", "Egap", "dGg", "dGg_norm", "points", "c1**2",
                      "c2**2", "r_xy"]

    def __init__(self, part3_string):
        self._part3_string = part3_string
        self.data = DataContainer(self._COLUMN_TITLES)
        self._dga = None
        self._dg0 = None
        self._maxima_bins = None
        self._minima_bins = None
        self.warning = None

        self._parse()
        if not self.data.get_rows():
            raise QFepOutputError("Part3 is empty (no rows).")


    def _parse(self):
        lines = self._part3_string.split('\n')
        # the first line is a comment
        lines.pop(0)
        # comment with column names
        header = lines.pop(0).strip()
        if header != self._PART3_HEADER:
            raise QFepOutputError("Part3 has a wrong header, did the qfep "
                                  "binary change?")
        for line in lines:
            line = re.split("#|\!", line)[0].strip()
            if not line:
                continue
            row = [float(x) for x in line.split()]
            self.data.add_row(row)

    @property
    def dga(self):
        if self._dga == None:
            self._get_dgs()
        return self._dga

    @property
    def dg0(self):
        if self._dg0 == None:
            self._get_dgs()
        return self._dg0

    @property
    def minima_bins(self):
        if self._minima_bins == None:
            self._get_dgs()
        return self._minima_bins

    @property
    def maxima_bins(self):
        if self._maxima_bins == None:
            self._get_dgs()
        return self._maxima_bins

    def _get_dgs(self):
        # Get minima and maxima without any smoothing.
        # If there is more than one maxima and less or more than 2 minima,
        # raise an exception search for maxima only between 0.2*nbins and
        # 0.8*nbins (bad sampling on the edges can raise an error)
        # Also, save the bins of the minima.

        bins, des, dgs = self.data.get_columns(["bin", "Egap", "dGg_norm"])
        minima, maxima = [], []
        nbins = len(bins)
        for i in range(1, nbins-1):     # from the second to the second last

            dg, dgnext, dgprev = dgs[i], dgs[i+1], dgs[i-1]
            if dgprev >= dg and dg < dgnext:
                minima.append(i)
            elif dgprev <= dg and dg > dgnext and \
                                 i > nbins*0.2 and i < nbins*0.8:
                maxima.append(i)

        if len(minima) > 2 or len(maxima) > 1:
            # Bad sampling, more minima and maxima than wanted.
            # Get the highest maxima from those found so far.
            # Get the absolute minima to the left and to the right of this
            # maxima. Save the warning.
            max1 = max(maxima, key=lambda i: dgs[i])
            react = [(dgs[i], i) for i in minima if i < max1]
            prod = [(dgs[i], i) for i in minima if i > max1]
            try:
                min1 = min(react)[1]   # min() will return tuple with lowest dg
                min2 = min(prod)[1]
            except ValueError:
                # multiple minima on one side, none on the other
                # (starts/ends at the lowest point)
                raise QFepOutputError("Bad reaction free energy profile - "
                                      "reactants minima: {}, products minima: "
                                      "{}".format(len(react), len(prod)))

            self.warning = "Rough Free energy profile ({} minima and {} "\
                           "maxima found), look at the graphs!"\
                           "".format(len(minima), len(maxima))
            maxima = [max1,]
            minima = [min1, min2]

        if len(minima) != 2:
            raise QFepOutputError("Bad reaction free energy profile - {} "
                                  "local minima (instead of 2)"
                                  "".format(len(minima)))
        elif len(maxima) != 1:
            raise QFepOutputError("Bad reaction free energy profile - {} "
                                  "local maxima (instead of 1)"
                                  "".format(len(maxima)))

        self._dga = dgs[maxima[0]] - dgs[minima[0]]
        self._dg0 = dgs[minima[1]] - dgs[minima[0]]
        self._minima_bins = [bins[mini] for mini in minima]
        self._maxima_bins = [bins[maxi] for maxi in maxima]

        # adjust the values in data so that the reactants are zero
        colindex = self.data.column_titles.index("dGg_norm")
        for row in self.data.get_rows():
            row[colindex] = row[colindex] - dgs[minima[0]]
Ejemplo n.º 21
0
    def calc_lra(self, lambda_a, lambda_b):
        """Calculate LRA and reorganization energies between two states.

        LRA = 0.5*(<E2-E1>_10+<E2-E1>_01)
        REO = 0.5*(<E2-E1>_10-<E2-E1>_01)

        E1 == Potential energy of state A
        E2 == Potential energy of state B
        <>_10 == Configuration space A (lambda_a)
        <>_01 == Configuration space B (lambda_b)

        E2_10 == Potential energy of state B at lambda_a

        Args:
            lambda_a (float):  lambda value of first state, usually 1.0
            lambda_b (float):  lambda value of second state, usually 0.0

        Returns:
            lra (DataContainer):  LRA and reorganization energies,\
                                  as well as contributions from\
                                  individual states
        """

        if self._num_evb_states != 2:
            raise QFepOutputError("LRA works only with two states")

        lra = DataContainer(["E_type", "(E2-E1)_10", "(E2-E1)_01",
                             "LRA", "REORG"])

        e1_a, e1_b, e2_a, e2_b = None, None, None, None
        # get the appropriate rows of energies
        # note that these energies are not scaled by lambda
        # [4:] ignores 'file', 'state', 'points' and 'lambda'
        for row in self.data_state[0].get_rows():
            if abs(row[3] - lambda_a) < 1e-7:
                e1_a = row[4:]
            if abs(row[3] - lambda_b) < 1e-7:
                e1_b = row[4:]
        # lambda2 in data_state[1] is actually (1-lambda), correct for that
        for row in self.data_state[1].get_rows():
            if abs((1 - row[3]) - lambda_a) < 1e-7:
                e2_a = row[4:]
            if abs((1 - row[3]) - lambda_b) < 1e-7:
                e2_b = row[4:]

        if not e1_a:
            raise QFepOutputError("LRA: No energy values for lambda == '{}'"
                                  "".format(lambda_a))
        if not e1_b:
            raise QFepOutputError("LRA: No energy values for lambda == '{}'"
                                  "".format(lambda_b))

        la, lb = lambda_a, lambda_b
        # calculate total E=(l1*E1 + l2*E2) energies
        e1_state1 = [la*e1a + (1-la)*e2a for e1a, e2a in zip(e1_a, e2_a)]
        e1_state2 = [la*e1b + (1-la)*e2b for e1b, e2b in zip(e1_b, e2_b)]

        e2_state1 = [lb*e1a + (1-lb)*e2a for e1a, e2a in zip(e1_a, e2_a)]
        e2_state2 = [lb*e1b + (1-lb)*e2b for e1b, e2b in zip(e1_b, e2_b)]

        # (E2-E1)_10    (reactant state) = First row E2 - E1
        # (E2-E1)_01    (products state) = Last row E2 - E1
        des_st1 = [e2 - e1 for e1, e2 in zip(e1_state1, e2_state1)]
        des_st2 = [e2 - e1 for e1, e2 in zip(e1_state2, e2_state2)]

        # LRA=0.5*(<E2-E1>_10+<E2-E1>_01)
        # REO=0.5*(<E2-E1>_10-<E2-E1>_01)
        des_st1_st2 = list(zip(des_st1, des_st2))
        es_lra = [0.5 * (de_st1 + de_st2) for de_st1, de_st2 in des_st1_st2]
        es_reo = [0.5 * (de_st1 - de_st2) for de_st1, de_st2 in des_st1_st2]

        e_types = self.data_state[0].column_titles[4:]

        for row in zip(e_types, des_st1, des_st2, es_lra, es_reo):
            lra.add_row(row)

        return lra
Ejemplo n.º 22
0
    def _parse(self):
        # find the version
        try:
            self.qcalc_version = self._VERSION_RE.findall(self.qcalc_output)[0]
        except IndexError:
            self.qcalc_version = "Unknown, likely ancient"
        # look for errors
        err = "\n".join(re.findall("ERROR.*", self.qcalc_output))
        if err:
            raise QCalcError("Errors in qcalc output: {}".format(err))

        # parse the list of calculations
        calc_list = self._CALCLIST_RE.findall(self.qcalc_output)
        if not calc_list:
            raise QCalcError("Failed to parse qcalc output")

        for line in calc_list[0].split("\n"):
            lf = line.split()
            calc_i = lf[0]
            if "Root Mean Square Deviation" in line:
                self.results[calc_i] = DataContainer(["Frame", "RMSD"])
            elif "distance between" in line:
                self.results[calc_i] = DataContainer(["Frame", "distance"])
            # TODO: extract the energy as well
            elif "distance, bond energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "distance"])
            # TODO: extract the energy as well
            elif "distance, qbond energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "distance"])
            elif "angle between" in line:
                self.results[calc_i] = DataContainer(["Frame", "angle"])
            elif "angle, angle energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "angle"])
            elif "angle, qangle energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "angle"])
            elif "torsion between" in line:
                self.results[calc_i] = DataContainer(["Frame", "torsion"])
            elif "torsion, torsion energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "torsion"])
            elif "torsion, qtorsion energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "torsion"])
            elif "nonbond monitor for residues" in line:
                pass
            else:
                logger.warning("Ignoring unknown QCalc5 results: {}"
                               "".format(line))

        # parse the normal results (distances, rmsds)
        res_list = self._RES_RE.findall(self.qcalc_output)
        if not res_list:
            raise QCalcError("Failed to parse qcalc output")

        # skip first row (--- Calculation results ---)
        res_list = res_list[0].split("\n")[1:]
        colheaders = res_list.pop(0)

        coltitles = []
        colheaders = colheaders.replace(": ", ":")  #fix
        for colheader in colheaders.split():
            if ":" in colheader:
                colheader, calctype = colheader.split(":")
                if not calctype:
                    continue  # residue nonbond calc
            coltitles.append(colheader)

        if coltitles and res_list:
            tmpdata = DataContainer(coltitles)
            for line in res_list:
                lf = line.split()
                if not lf:
                    continue
                tmpdata.add_row(lf)

        for k, datac in self.results.items():
            for i, v in enumerate(zip(*tmpdata.get_columns(columns=[
                    k,
            ]))):
                datac.add_row((i, float(v[0])))

        # parse the average residue nonbond energies (if they exist)
        res_resnb = self._RESNB_RE.findall(self.qcalc_output)
        if res_resnb:
            self.results["gc"] = DataContainer(["Residue", "E_LJ", "E_EL"])
            # skip two lines
            # TODO: extract qatoms indexes?
            res_resnb = res_resnb[0].split("\n")[2:]
            for line in res_resnb:
                lf = line.split()
                if lf:
                    resid, elj, eel = int(lf[0]), float(lf[1]), float(lf[2])
                    self.results["gc"].add_row((resid, elj, eel))
Ejemplo n.º 23
0
    def __init__(self, logfile, timeunit="ps", stepsize=None, starttime=0):
        """
        Parses a Q dynamics logfile and extracts data (temperature, energies...)
        For interfacing, use QAnalyseDyns.

        Args:
           logfile (string):  path/filename of Q logfile
           timeunit (string):  fs,ps,ns (optional, default is ps)
           stepsize (float):  in case the one in Q is 0.000 (Q printout is a work of art)


        Usage looks like this:

        # parse
        qad = QAnalyseDyns(.....).analysed[0]

        # print out nicely formatted temperature stats
        print qad.get_temp_stats()

        # get averages for seconds half (step >= 50% of steps) of all the temperatures
        temps = qad.data_temp.get_columns()

        coltitles = qad.data_temp.get_column_titles()
        # [ "Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent" ]

        midpoint = int(temps[0][-1])/2        # 0 == "Time", -1 == last frame
        for i,colt in coltitles[1:]:
            print colt, np.mean( [ x for j,x in enumerate(temps[i]) if temps[0][j] >= midpoint ] )

        # get the potential energy data and just print it out
        Epot = qad.data_E_SUM.get_columns( ["Time", "Potential"] )
        print Epot

        """

        # parse the logfile:
        # first the header using RE
        # then dynamics (_parse_dyn()) line by line using the lazy generator in 'open' (less memory consumption and faster than regular expressions)

        self._logfile = logfile
        self._starttime = starttime

        self.MAP_TIME = {"fs": 1.0, "ps": 1e-3, "ns": 1e-6}
        if timeunit not in self.MAP_TIME:
            raise QAnalyseDynError(
                "Timeunit has to be either 'fs', 'ps' or 'ns'")
        self._timeconv = self.MAP_TIME[timeunit]

        self._header = ""
        try:
            with open(self._logfile, 'r') as lf:
                for line in lf:
                    self._header += line
                    if "Initialising dynamics" in line:
                        break
        except IOError as e:
            raise QAnalyseDynError("Could not read the logfile: " + str(e))

        # use RE to get some info about the simulations
        m = re.search("Build number\s*([\d\.]+)", self._header)
        if m: self._qversion = m.group(1)
        else:
            m = re.search('QDyn version 5.06', self._header)
            if m:
                self._qversion = '5.06'
            else:
                raise QAnalyseDynError("Not a valid Q log file or Q version "
                                       "is very old...")

        m = re.search("Topology file      =\s*(\S+)", self._header)
        if m: self._topfile = m.group(1)
        else: raise QAnalyseDynError("Couldn't find the topology filename!?")

        m = re.search("Number of MD steps =\s*(\d+)", self._header)
        if m: self._md_steps = int(m.group(1))
        else: raise QAnalyseDynError("Couldn't find number of steps!?")

        m = re.search("Stepsize \(fs\)    =\s*([\d\.]+)", self._header)
        if m: self._stepsize = float(m.group(1))
        else: raise QAnalyseDynError("Couldn't find the stepsize!?")

        if not stepsize:
            if abs(self._stepsize - 0.0) < 1e-8:
                raise QAnalyseDynError("Can't convert steps to time, stepsize "
                                       "is 0.0 in the logfile (Q sucks). Set "
                                       "the stepsize please.")
        else:
            if self._stepsize:
                raise QAnalyseDynError("Will not override the non-zero "
                                       "stepsize in the logfile...")
            else:
                self._stepsize = stepsize

        m = re.search("FEP input file     =\s*(\S+)", self._header)
        if m: self._fepfile = m.group(1)
        else: self._fepfile = None

        if self._fepfile:
            m = re.search("No. of fep/evb states    =\s*(\d+)", self._header)
            if m: self._evb_states = int(m.group(1))
            else:
                raise QAnalyseDynError("Couldn't find the number of states!?")

        offdsection = re.search(
            "(No. of offdiagonal \(Hij\) functions =.*?^$)", self._header,
            re.MULTILINE | re.DOTALL).group(1)
        offdgs = re.findall("\s+\d+\s+\d+\s+(\d+)\s+(\d+)\s+[\d\.]+\s+[\d\.]+",
                            offdsection)

        #
        # make datacontainer variables for storing all the data

        # offdiags
        offdiags = ["{}_{}".format(a1, a2) for a1, a2 in offdgs]
        self._tmp_offdiags = {}
        for k in offdiags:
            self._tmp_offdiags[k] = DataContainer(["Time", "Distance"])

        # temperature
        self.data_temp = DataContainer(
            ["Time", "T_tot", "T_free", "T_free_solute", "T_free_solvent"])

        # energies
        self.data_E_solute = DataContainer(
            ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"])
        self.data_E_solvent = DataContainer(
            ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"])
        self.data_E_solute_solvent = DataContainer(["Time", "El", "VdW"])
        self.data_E_LRF = DataContainer(["Time", "El"])
        self.data_E_Q_atom = DataContainer(
            ["Time", "El", "VdW", "Bond", "Angle", "Torsion", "Improper"])
        self.data_E_restraints = DataContainer([
            "Time", "Total", "Fix", "Solvent_rad", "Solvent_pol", "Shell",
            "Solute"
        ])
        self.data_E_SUM = DataContainer(
            ["Time", "Total", "Potential", "Kinetic"])

        # Q energies
        q_columns1 = ("Time", "Lambda", "El", "VdW")
        q_columns2 = ("Time", "Lambda", "El", "VdW", "Bond", "Angle",
                      "Torsion", "Improper")
        q_columns3 = ("Time", "Lambda", "Total", "Restraint")

        self.data_EQ_Q, self.data_EQ_prot = [], []
        self.data_EQ_wat, self.data_EQ_surr = [], []
        self.data_EQ_any, self.data_EQ_SUM = [], []
        for i in range(self._evb_states):
            self.data_EQ_Q.append(DataContainer(q_columns1))
            self.data_EQ_prot.append(DataContainer(q_columns1))
            self.data_EQ_wat.append(DataContainer(q_columns1))
            self.data_EQ_surr.append(DataContainer(q_columns1))
            self.data_EQ_any.append(DataContainer(q_columns2))
            self.data_EQ_SUM.append(DataContainer(q_columns3))

        # mapping of energy types (label in the output) with containers
        self.map_en_section = {
            "solute": self.data_E_solute,
            "solvent": self.data_E_solvent,
            "solute-solvent": self.data_E_solute_solvent,
            "LRF": self.data_E_LRF,
            "Q-atom": self.data_E_Q_atom,
            "SUM": self.data_E_SUM
        }

        self.map_qen_section = {
            "Q-Q": self.data_EQ_Q,
            "Q-prot": self.data_EQ_prot,
            "Q-wat": self.data_EQ_wat,
            "Q-surr.": self.data_EQ_surr,
            "Q-any": self.data_EQ_any,
            "Q-SUM": self.data_EQ_SUM
        }

        self._parse_dyn()
        d_dcs = self._tmp_offdiags.values()
        cts = [
            "Time",
        ] + self._tmp_offdiags.keys()
        self.data_offdiags = DataContainer(cts)
        # TODO: clean up this magic below
        for d_row in zip(*[
                d_dcs[0].get_columns([
                    0,
                ])[0],
        ] + [d_dc.get_columns([
                1,
        ])[0] for d_dc in d_dcs]):
            self.data_offdiags.add_row(d_row)

        self._endtime = self._md_steps * self._stepsize * self._timeconv \
                      + self._starttime