Python DataContainer.get_columnsの例

プログラミング言語: Python

名前空間/パッケージ名: Qpyl.common

クラス/型: DataContainer

メソッド/関数: get_columns

hotexamples.comのコード掲載数: 4

Python DataContainer.get_columns - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのQpyl.common.DataContainer.get_columnsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DataContainer(18)

add_row(17)

get_columns(4)

get_rows(4)

delete_rows(1)

get_column_titles(1)

コード例 #1

ファイルを表示

ファイル: qfep.py プロジェクト: leelasd/qtools

class _QFepPart1(object):
    """Class for parsing and storing data from Part1 in Qfep output.

    Part1 contains free energies vs. lambda (FEP).

    If parsing is unsuccessful QFepOutputError is raised,
    else all the data is stored in DataContainer object 'data'.

    Usage:
    cols=["Lambda", "dG"]
    dG_lambda = _QFepPart1.data.get_rows(columns=cols)


    Args:
        part1_string (string):  string of Part1 in qfep output

    """

    _PART1_HEADER = "# lambda(1)      dGf sum(dGf)      dGr sum(dGr)     <dG>"
    _COLUMN_TITLES = ["Lambda", "dGf", "sum_dGf", "dGr", "sum_dGr", "dG"]

    def __init__(self, part1_string):
        self._part1_string = part1_string
        self.data = DataContainer(self._COLUMN_TITLES)

        self._parse()
        if not self.data.get_rows():
            raise QFepOutputError("Part1 is empty (no rows).")

    @property
    def dg(self):
        """Return final dG(lambda)   (FEP)"""
        return self.data.get_columns(["dG"])[0][-1]

    def _parse(self):

        lines = self._part1_string.split('\n')
        # the first line is a comment
        lines.pop(0)

        ## In newer versions of Q, two additional lines are printed
        # to distinguish between 'full', 'exclusions' and 'qcp'
        # check for the two extra lines and remove them
        if "Calculation" in lines[1]:
            lines = lines[2:]
        # comment with column names
        header = lines.pop(0).strip()
        if header != self._PART1_HEADER:
            raise QFepOutputError("Part1 has a wrong header, did the qfep5 "
                                  "binary change?")
        for line in lines:
            line = re.split("#|\!", line)[0].strip()
            if not line:
                continue
            row = [float(x) for x in line.split()]
            self.data.add_row(row)

コード例 #2

ファイルを表示

    def _parse(self):
        # find the version
        try:
            self.qcalc_version = self._VERSION_RE.findall(self.qcalc_output)[0]
        except IndexError:
            self.qcalc_version = "Unknown, likely ancient"
        # look for errors
        err = "\n".join(re.findall("ERROR.*", self.qcalc_output))
        if err:
            raise QCalcError("Errors in qcalc output: {}".format(err))

        # parse the list of calculations
        calc_list = self._CALCLIST_RE.findall(self.qcalc_output)
        if not calc_list:
            raise QCalcError("Failed to parse qcalc output")

        for line in calc_list[0].split("\n"):
            lf = line.split()
            calc_i = lf[0]
            if "Root Mean Square Deviation" in line:
                self.results[calc_i] = DataContainer(["Frame", "RMSD"])
            elif "distance between" in line:
                self.results[calc_i] = DataContainer(["Frame", "distance"])
            # TODO: extract the energy as well
            elif "distance, bond energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "distance"])
            # TODO: extract the energy as well
            elif "distance, qbond energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "distance"])
            elif "angle between" in line:
                self.results[calc_i] = DataContainer(["Frame", "angle"])
            elif "angle, angle energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "angle"])
            elif "angle, qangle energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "angle"])
            elif "torsion between" in line:
                self.results[calc_i] = DataContainer(["Frame", "torsion"])
            elif "torsion, torsion energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "torsion"])
            elif "torsion, qtorsion energy between" in line:
                self.results[calc_i] = DataContainer(["Frame", "torsion"])
            elif "nonbond monitor for residues" in line:
                pass
            else:
                logger.warning("Ignoring unknown QCalc5 results: {}"
                               "".format(line))

        # parse the normal results (distances, rmsds)
        res_list = self._RES_RE.findall(self.qcalc_output)
        if not res_list:
            raise QCalcError("Failed to parse qcalc output")

        # skip first row (--- Calculation results ---)
        res_list = res_list[0].split("\n")[1:]
        colheaders = res_list.pop(0)

        coltitles = []
        colheaders = colheaders.replace(": ", ":")  #fix
        for colheader in colheaders.split():
            if ":" in colheader:
                colheader, calctype = colheader.split(":")
                if not calctype:
                    continue  # residue nonbond calc
            coltitles.append(colheader)

        if coltitles and res_list:
            tmpdata = DataContainer(coltitles)
            for line in res_list:
                lf = line.split()
                if not lf:
                    continue
                tmpdata.add_row(lf)

        for k, datac in self.results.items():
            for i, v in enumerate(zip(*tmpdata.get_columns(columns=[
                    k,
            ]))):
                datac.add_row((i, float(v[0])))

        # parse the average residue nonbond energies (if they exist)
        res_resnb = self._RESNB_RE.findall(self.qcalc_output)
        if res_resnb:
            self.results["gc"] = DataContainer(["Residue", "E_LJ", "E_EL"])
            # skip two lines
            # TODO: extract qatoms indexes?
            res_resnb = res_resnb[0].split("\n")[2:]
            for line in res_resnb:
                lf = line.split()
                if lf:
                    resid, elj, eel = int(lf[0]), float(lf[1]), float(lf[2])
                    self.results["gc"].add_row((resid, elj, eel))

コード例 #3

ファイルを表示

ファイル: qgroupcontrib.py プロジェクト: malramsay64/qtools

class QGroupContrib(object):
    """Class for calculating LRA group contributions from EVB trajectories.

    Args:
        qcalc_exec (string): path of qcalc executable
        calcdirs (list of strings): list of directories
        pdb_file (string): PDB created with qprep
        en_list_fn (string): energy-files-list filename
        lambdas_A (tuple of floats): lambdas of state A (1.0, 0.0)
        lambdas_B (tuple of floats): lambdas of state B (0.5, 0.5)
        resid_first (int): index of first residue used for calcs
        resid_last (int): index of last residue used for calcs
        scale_ionized (float): scale down ionized residues (ARG, LYS, \
                               HIP, GLU, ASP) by this factor \
                               (see doi:10.1021/jp962478o)
        nthreads (int): number of threads
        qmask (list of ints): list of atom indexes to be used as the\
                              Q mask for the GC calculations. By default,\
                              this is obtained from the FEP file.

    """
    def __init__(self,
                 qcalc_exec,
                 calcdirs,
                 pdb_file,
                 en_list_fn,
                 lambdas_A,
                 lambdas_B,
                 resid_first,
                 resid_last,
                 scale_ionized,
                 nthreads,
                 qmask=None):

        self._en_list_fn = en_list_fn
        self._qcalc_exec = qcalc_exec
        try:
            self._pdb_qstruct = QStruct(pdb_file, "pdb")
        except QStructError as error_msg:
            raise QGroupContribError("Can't parse PDB file '{}': {}"
                                     "".format(pdb_file, error_msg))

        self._calcdirs = [os.path.relpath(cd) for cd in calcdirs]
        self._nthreads = nthreads
        self._lambdas_A = lambdas_A
        self._lambdas_B = lambdas_B
        self._resid_first = resid_first
        self._resid_last = resid_last
        self._scale_ionized = scale_ionized
        self._qmask = qmask

        self._qcalc_io = ODict()
        self.gcs = ODict()
        self.failed = ODict()
        self.qcalc_version = None

        self.kill_event = threading.Event()

        lambda1_st1, lambda2_st1 = lambdas_A[0], lambdas_B[0]
        sci = self._scale_ionized
        colnames = [
            "Residue id", "Residue name", "N",
            "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1,
                      lambda2_st1), "LRA_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "LRA_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1, lambda2_st1),
            "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})_mean"
            "".format(lambda1_st1, lambda2_st1,
                      sci), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})_stdev"
            "".format(lambda1_st1, lambda2_st1,
                      sci), "REORG_VdW(l={:5.4f}->l={:5.4f})_mean"
            "".format(lambda1_st1,
                      lambda2_st1), "REORG_VdW(l={:5.4f}->l={:5.4f})_stdev"
            "".format(lambda1_st1, lambda2_st1),
            "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})_mean"
            "".format(lambda1_st1, lambda2_st1,
                      sci), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})_stdev"
            "".format(lambda1_st1, lambda2_st1, sci)
        ]
        self.gcs_stats = DataContainer(colnames)

    def calcall(self):
        """Run the GC calcs, update .gcs, .failed and .gcs_stats.
        """
        semaphore = threading.BoundedSemaphore(self._nthreads)

        self._qcalc_io.clear()
        self.gcs.clear()
        self.gcs_stats.delete_rows()
        self.failed.clear()
        threads = []
        for calcdir in self._calcdirs:
            threads.append(_QGroupContribThread(self, semaphore, calcdir))
            threads[-1].start()

        for t in threads:
            while t.isAlive():
                t.join(1.0)
                if self.kill_event.is_set():
                    try:
                        t.qcalc.process.terminate()
                    except Exception as e:
                        pass
                    return

            if t.error:
                self.failed[t.calcdir] = t.error
            else:
                self._qcalc_io[t.calcdir] = (t.qinps, t.qouts)

        # parse the output for results and
        # calculate LRAs for each dir
        for _dir, (_, qouts) in self._qcalc_io.iteritems():
            gcs = []
            failed_flag = False
            for qout in qouts:
                try:
                    qco = QCalcOutput(qout)
                    res = qco.results["gc"]
                    if not self.qcalc_version:
                        self.qcalc_version = qco.qcalc_version
                except (QCalcError, KeyError) as error_msg:
                    self.failed[_dir] = error_msg
                    failed_flag = True
                    break
                gc = {}
                for row in res.get_rows():
                    resid, vdw, el = int(row[0]), float(row[1]), float(row[2])
                    gc[resid] = {"vdw": vdw, "el": el}
                gcs.append(gc)

            if failed_flag:
                continue

            resids = sorted(gcs[0].keys())
            resnames = [
                self._pdb_qstruct.residues[ri - 1].name for ri in resids
            ]

            # do the LRA thingy
            # LRA = 0.5*(<E2-E1>_conf1+<E2-E1>_conf2)
            # REORG = <E2-E1>_conf1 - LRA
            e2e1_st1_vdw = [
                gcs[1][key]["vdw"] - gcs[0][key]["vdw"] for key in resids
            ]
            e2e1_st1_el = [
                gcs[1][key]["el"] - gcs[0][key]["el"] for key in resids
            ]
            e2e1_st2_vdw = [
                gcs[3][key]["vdw"] - gcs[2][key]["vdw"] for key in resids
            ]
            e2e1_st2_el = [
                gcs[3][key]["el"] - gcs[2][key]["el"] for key in resids
            ]

            # super efficient stuff here
            vdw_lra = [
                0.5 * (a + b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)
            ]
            el_lra = [0.5 * (a + b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)]
            vdw_reorg = [
                0.5 * (a - b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)
            ]
            el_reorg = [
                0.5 * (a - b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)
            ]

            # scale the ionized residues
            if abs(self._scale_ionized - 1.0) > 1e-7:
                for i, resname in enumerate(resnames):
                    if resname in ("ARG", "LYS", "HIP", "ASP", "GLU"):
                        e2e1_st1_el[i] = e2e1_st1_el[i] / self._scale_ionized
                        e2e1_st2_el[i] = e2e1_st2_el[i] / self._scale_ionized
                        el_lra[i] = el_lra[i] / self._scale_ionized
                        el_reorg[i] = el_reorg[i] / self._scale_ionized

            # write the DataContainer
            lambda1_st1 = self._lambdas_A[0]
            lambda2_st1 = self._lambdas_B[0]
            gc_lra = DataContainer([
                "Residue_id", "Residue name",
                "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized),
                "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1,
                          self._scale_ionized), "LRA_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized),
                "REORG_VdW(l={:5.4f}->l={:5.4f})"
                "".format(lambda1_st1, lambda2_st1),
                "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})"
                "".format(lambda1_st1, lambda2_st1, self._scale_ionized)
            ])

            for row in zip(resids, resnames, e2e1_st1_vdw, e2e1_st1_el,
                           e2e1_st2_vdw, e2e1_st2_el, vdw_lra, el_lra,
                           vdw_reorg, el_reorg):
                gc_lra.add_row(row)

            self.gcs[_dir] = gc_lra

        # get GC stats over all directories
        self.gcs_stats.delete_rows()
        gcs = {}
        for _, gc in self.gcs.iteritems():
            for row in gc.get_rows():
                resid, resname = row[0:2]
                res_key = "{}.{}".format(resid, resname)
                values = [[
                    val,
                ] for val in row[2:]]
                if not gcs.has_key(res_key):
                    gcs[res_key] = values
                else:
                    for i, val in enumerate(gcs[res_key]):
                        val.extend(values[i])

        # iterate through each residue and calculate
        # means and stdevs
        # (sort by residue index)
        for res_key in sorted(gcs.keys(), key=lambda x: int(x.split(".")[0])):
            rc = gcs[res_key]
            resid, resname = res_key.split(".")
            # get mean and stdev
            rc_stats = [
                int(resid),
                resname,
                len(rc[0]),
                np.mean(rc[0]),
                np.std(rc[0]),  # <E2-E1>1 vdw
                np.mean(rc[1]),
                np.std(rc[1]),  # <E2-E1>1 el
                np.mean(rc[2]),
                np.std(rc[2]),  # <E2-E1>2 vdw
                np.mean(rc[3]),
                np.std(rc[3]),  # <E2-E1>2 el
                np.mean(rc[4]),
                np.std(rc[4]),  # LRA vdw
                np.mean(rc[5]),
                np.std(rc[5]),  # LRA el
                np.mean(rc[6]),
                np.std(rc[6]),  # REORG vdw
                np.mean(rc[7]),
                np.std(rc[7])
            ]  # REORG el

            self.gcs_stats.add_row(rc_stats)

    def _calcsingle(self, calcdir, qcalc):
        # find input files with given lambdas
        # (and correct energy files)
        # extract information and run qcalc for each combination
        #   fep_000_1.000.dcd, "1.00 0.00"
        #   fep_000_1.000.dcd, "0.00 0.00"
        #   fep_050_0.000.dcd, "1.00 0.00"
        #   fep_050_0.000.dcd, "0.00 0.00"
        # return input output strings as a tuple of lists of strings
        # ( [inp1, inp2, inp3, inp4], [out1, out2, out3, out4] )
        # or raise QGroupContribError on failure

        # get the list of energy-files
        try:
            en_list_fn = os.path.join(calcdir, self._en_list_fn)
            en_list_fn_str = open(en_list_fn, 'r').read()
        except IOError:
            raise QGroupContribError("No energy-files list '{}'."
                                     "".format(self._en_list_fn))

        en_list = [enf for enf in en_list_fn_str.split("\n") \
                                                if enf.strip() != ""]

        if not en_list:
            raise QGroupContribError("No energy files in '{}'."
                                     "".format(self._en_list_fn))

        # parse all input files in calcdir for
        # a valid energy file and lambda values
        inp_fns = [inp for inp in os.listdir(calcdir) if inp.endswith(".inp")]
        lambda_inp_map = {}
        for inp in inp_fns:
            try:
                inp_file = os.path.join(calcdir, inp)
                qdi = QDynInput(input_string=open(inp_file, "r").read())
            except (IOError, QDynInputError) as error_msg:
                logger.debug("Error reading Q input '{}': {}"
                             "".format(inp, error_msg))
                continue

            try:
                lambda_st1 = float(qdi.parameters["lambdas"].split()[0])
                en_file = qdi.parameters["files"]["energy"]
            except KeyError:
                logger.debug("Input '{}' missing lambda or energy file"
                             "".format(inp))
                continue

            if en_file not in en_list:
                continue

            lambda_key = "{:.6f}".format(lambda_st1)
            try:
                inp2 = lambda_inp_map[lambda_key][0]
            except KeyError:
                lambda_inp_map[lambda_key] = (inp, qdi)
            else:
                raise QGroupContribError("Same lambda values in Qdyn "
                                         "inputs: '{}', '{}' ??"
                                         "".format(inp, inp2))

        # get inputs that match specified state1 lambda values
        lambdas_st1 = (self._lambdas_A[0], self._lambdas_B[0])
        try:
            inputs = []
            for lamb_st1 in lambdas_st1:
                lamb_key = "{:.6f}".format(lamb_st1)
                inputs.append(lambda_inp_map[lamb_key])
        except KeyError:
            raise QGroupContribError("QDyn input with lambda=='{}' "
                                     "(and energy file in '{}') not found."
                                     "".format(lamb_st1, en_list_fn))

        # get topology, fep and trajectory filenames from the inputs
        top_fn, fep_fn, dcd_fns = None, None, []
        for inp, qdi in inputs:
            try:
                tmp_top_fn = qdi.parameters["files"]["topology"]
            except KeyError:
                raise QGroupContribError("Topology not found in Qdyn "
                                         "input '{}'.".format(inp))
            if top_fn and top_fn != tmp_top_fn:
                raise QGroupContribError("Qdyn inputs with different "
                                         "topologies: '{}', '{}' ??"
                                         "".format(top_fn, tmp_top_fn))

            try:
                tmp_fep_fn = qdi.parameters["files"]["fep"]
            except KeyError:
                raise QGroupContribError("Fep file not found in Qdyn "
                                         "input '{}'.".format(inp))
            if fep_fn and fep_fn != tmp_fep_fn:
                raise QGroupContribError("Qdyn inputs with different "
                                         "fep files: '{}', '{}' ??"
                                         "".format(fep_fn, tmp_fep_fn))

            try:
                tmp_dcd_fn = qdi.parameters["files"]["trajectory"]
            except KeyError:
                raise QGroupContribError("Trajectory file not found in Qdyn "
                                         "input '{}'.".format(inp))

            top_fn = tmp_top_fn
            fep_fn = tmp_fep_fn
            dcd_fns.append(tmp_dcd_fn)

        # check if files are missing
        for fn in [top_fn, fep_fn] + dcd_fns:
            if not os.path.lexists(os.path.join(calcdir, fn)):
                raise QGroupContribError("Missing file: {}".format(fn))

        if not self._qmask:
            # parse fep for q atom numbers
            with open(os.path.join(calcdir, fep_fn), "r") as fep:
                section = ""
                q_atoms = []
                for line in fep.readlines():
                    line = line.split("#")[0].split("!")[0].strip()
                    if line == "":
                        continue
                    elif line[0] == "[":
                        section = line
                    elif section == "[atoms]":
                        q_atoms.append(line.split()[1])
        else:
            q_atoms = self._qmask

        masks = ["{} {}".format(ai, ai) for ai in q_atoms]

        # make qcalc inputs for every combination of
        # configuration (dcd) and potential (lambda),
        # run them and return the inputs and outputs
        combs = (
            (dcd_fns[0], self._lambdas_A),  # E1_conf1
            (dcd_fns[0], self._lambdas_B),  # E2_conf1
            (dcd_fns[1], self._lambdas_A),  # E1_conf2
            (dcd_fns[1], self._lambdas_B))  # E2_conf2
        # example with lambdas "1.00 0.00" and "0.50 0.50":
        #
        # fep_000_1.000.dcd, (1.00, 0.00)
        # fep_000_1.000.dcd, (0.50, 0.50)
        # fep_025_0.500.dcd, (1.00, 0.00)
        # fep_025_0.500.dcd, (0.50, 0.50)

        input_strings = []
        output_strings = []
        for dcdfile, lambdas in combs:
            qci = QCalcInput(top_fn, [
                dcdfile,
            ], fep_fn, lambdas)

            qci.add_residue_nb_mon(self._resid_first, self._resid_last, masks)

            qcalc_inp_str = qci.get_string()

            try:
                qcalc_out_str = qcalc.run(qcalc_inp_str, workdir=calcdir)
            except QCalcError as error_msg:
                raise QGroupContribError(error_msg)

            input_strings.append(qcalc_inp_str)
            output_strings.append(qcalc_out_str)

        return (input_strings, output_strings)

    @property
    def details(self):

        fails = "\n".join(["{}: {}".format(cd, e) \
                           for cd, e in self.failed.iteritems()])

        calcdirs = ", ".join(self._calcdirs)
        outstr = """
---------------------------------- GC details ---------------------------------
# Calculated with: Qtools ({version}), Qcalc ({qcalc_version})
# Qcalc path: {qcalc_exec}
# Work dir: {cwd}
# Date: {date}
# CMDline: {cmdline}

Directories:
{dirs}

Fails:
{fails}
-------------------------------------------------------------------------------
""".format(version=__version__,
           cwd=os.getcwd(),
           date=time.ctime(),
           cmdline=" ".join(sys.argv),
           qcalc_version=self.qcalc_version,
           fails=fails or "None",
           dirs=calcdirs,
           qcalc_exec=os.path.abspath(self._qcalc_exec))

        return outstr

    @property
    def plotdata(self):
        """Return GC data as a dictionary of PlotData objects.

        Example keys in returned dictionary:
            'gc_lra_el': PlotData of electrostatic LRA group contributions,
                         one subplot - means vs residue index

            'gc_lra_el_top': PlotData of top 20 electrostatic LRA GCs
                             one subplot - means vs "resid.resname"

            'gc_lra_vdw': PlotData of vdw LRA GCs,
                          one subplot - means vs residue indexes

            'gc_reorg_el': PlotData of el. 'REORG' group contributions,
                           one subplot - means vs residue index

            'gc_de1_el': PlotData of electrostatic <E1 - E2>_1,
                         one subplot - means vs residue index

            'gc_de2_el': PlotData of electrostatic <E1 - E2>_2,
                         one subplot - means vs residue index
        """

        plots = ODict()

        # all failed
        if not self.gcs:
            return plots

        lamb1, lamb2 = self._lambdas_A[0], self._lambdas_B[0]

        # make PlotData objects
        plots["gc_lra_el_top"] = PlotData("Top LRA GC (El, {}->{}, iscale={}),"
                                          " top 20".format(
                                              lamb1, lamb2,
                                              self._scale_ionized),
                                          xlabel="Residue",
                                          ylabel="Free energy  [kcal/mol]",
                                          plot_type="bar")

        plots["gc_reorg_el_top"] = PlotData(
            "Top REORG GC (El, {}->{}, iscale={}),"
            " top 20".format(lamb1, lamb2, self._scale_ionized),
            xlabel="Residue",
            ylabel="Free energy  [kcal/mol]",
            plot_type="bar")

        plots["gc_lra_el"] = PlotData("LRA GC (El, {}->{}, iscale={})"
                                      "".format(lamb1, lamb2,
                                                self._scale_ionized),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_lra_vdw"] = PlotData("LRA GC (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        plots["gc_reorg_el"] = PlotData("REORG GC (El, {}->{}, iscale={})"
                                        "".format(lamb1, lamb2,
                                                  self._scale_ionized),
                                        xlabel="Residue index",
                                        ylabel="Energy  [kcal/mol]",
                                        plot_type="bar")

        plots["gc_reorg_vdw"] = PlotData("REORG GC (VdW, {}->{})"
                                         "".format(lamb1, lamb2),
                                         xlabel="Residue index",
                                         ylabel="Energy  [kcal/mol]",
                                         plot_type="bar")

        plots["gc_de1_el"] = PlotData("<E1-E2>_1 (El, {}->{})"
                                      "".format(lamb1, lamb2),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_de1_vdw"] = PlotData("<E1-E2>_1 (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        plots["gc_de2_el"] = PlotData("<E1-E2>_2 (El, {}->{})"
                                      "".format(lamb1, lamb2),
                                      xlabel="Residue index",
                                      ylabel="Energy  [kcal/mol]",
                                      plot_type="bar")

        plots["gc_de2_vdw"] = PlotData("<E1-E2>_2 (VdW, {}->{})"
                                       "".format(lamb1, lamb2),
                                       xlabel="Residue index",
                                       ylabel="Energy  [kcal/mol]",
                                       plot_type="bar")

        cols = self.gcs_stats.get_columns()
        resids = cols[0]
        title = "mean_N={}".format(len(self.gcs))

        plots["gc_de1_vdw"].add_subplot(title, resids, cols[3], yerror=cols[4])
        plots["gc_de1_el"].add_subplot(title, resids, cols[5], yerror=cols[6])

        plots["gc_de2_vdw"].add_subplot(title, resids, cols[7], yerror=cols[8])
        plots["gc_de2_el"].add_subplot(title, resids, cols[9], yerror=cols[10])

        plots["gc_lra_vdw"].add_subplot(title,
                                        resids,
                                        cols[11],
                                        yerror=cols[12])
        plots["gc_lra_el"].add_subplot(title,
                                       resids,
                                       cols[13],
                                       yerror=cols[14])

        plots["gc_reorg_vdw"].add_subplot(title,
                                          resids,
                                          cols[15],
                                          yerror=cols[16])
        plots["gc_reorg_el"].add_subplot(title,
                                         resids,
                                         cols[17],
                                         yerror=cols[18])

        # top 20 LRA el
        sorted_rows = sorted(self.gcs_stats.get_rows(),
                             key=lambda x: -abs(x[5]))[:20]
        cols = zip(*sorted_rows)
        resids, resnames = cols[0], cols[1]
        keys = ["{}_{}".format(rn.capitalize(), ri) \
                                for ri, rn in zip(resids, resnames)]
        els, elstd = cols[13], cols[14]
        plots["gc_lra_el_top"].add_subplot(title, keys, els, yerror=elstd)

        # top 20 reorg el
        sorted_rows = sorted(self.gcs_stats.get_rows(),
                             key=lambda x: -abs(x[9]))[:20]
        cols = zip(*sorted_rows)
        resids, resnames = cols[0], cols[1]
        keys = ["{}_{}".format(rn.capitalize(), ri) \
                                for ri, rn in zip(resids, resnames)]
        els, elstd = cols[17], cols[18]
        plots["gc_reorg_el_top"].add_subplot(title, keys, els, yerror=elstd)

        return plots

    def get_pdbgc(self):
        """Return the structure in PDB format (string) with added GC values.

        Fill the Occupancy fields with LRA contributions and
        Temperature factor fields with REORG contributions.
        """

        try:
            resids, lras, reorgs = self.gcs_stats.get_columns([0, 13, 17])
        except IndexError:
            resids, lras, reorgs = [], [], []
        pdb = []
        for mol in self._pdb_qstruct.molecules:
            for res in mol.residues:
                try:
                    i = resids.index(res.index)
                    lra_gc, reorg_gc = lras[i], reorgs[i]
                except ValueError:
                    lra_gc, reorg_gc = 0, 0

                for atom in res.atoms:
                    x, y, z = atom.coordinates
                    pdb.append("ATOM  {:>5d} {:<4s} {:3s}  {:>4d}    "\
                                "{:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}"\
                                "".format(atom.index, atom.name,
                                          atom.residue.name,
                                          atom.residue.index,
                                          x, y, z, lra_gc, reorg_gc))
            pdb.append("GAP")
        return "\n".join(pdb)

コード例 #4

ファイルを表示

class _QFepPart3(object):
    """Class for parsing and storing data from Part3 in Qfep output.

    Part3 contains the bin-averaged dGg values, points and squared
    eigenvectors from Part2.

    If parsing is unsuccessful QFepOutputError is raised,
    else all the data is stored in DataContainer object 'data'.

    Args:
        part3_string (string):  string of Part3 in qfep output

    Usage:
    >>> cols = ["Lambda", "dGg"]
    >>> dGg_lambda = _QFepPart3.data.get_rows(columns=cols)


    """

    _PART3_HEADER = "# bin  energy gap  <dGg> <dGg norm> pts  <c1**2> "\
                   "<c2**2> <r_xy>"

    _COLUMN_TITLES = ["bin", "Egap", "dGg", "dGg_norm", "points", "c1**2",
                      "c2**2", "r_xy"]

    def __init__(self, part3_string):
        self._part3_string = part3_string
        self.data = DataContainer(self._COLUMN_TITLES)
        self._dga = None
        self._dg0 = None
        self._maxima_bins = None
        self._minima_bins = None
        self.warning = None

        self._parse()
        if not self.data.get_rows():
            raise QFepOutputError("Part3 is empty (no rows).")


    def _parse(self):
        lines = self._part3_string.split('\n')
        # the first line is a comment
        lines.pop(0)
        # comment with column names
        header = lines.pop(0).strip()
        if header != self._PART3_HEADER:
            raise QFepOutputError("Part3 has a wrong header, did the qfep "
                                  "binary change?")
        for line in lines:
            line = re.split("#|\!", line)[0].strip()
            if not line:
                continue
            row = [float(x) for x in line.split()]
            self.data.add_row(row)

    @property
    def dga(self):
        if self._dga == None:
            self._get_dgs()
        return self._dga

    @property
    def dg0(self):
        if self._dg0 == None:
            self._get_dgs()
        return self._dg0

    @property
    def minima_bins(self):
        if self._minima_bins == None:
            self._get_dgs()
        return self._minima_bins

    @property
    def maxima_bins(self):
        if self._maxima_bins == None:
            self._get_dgs()
        return self._maxima_bins

    def _get_dgs(self):
        # Get minima and maxima without any smoothing.
        # If there is more than one maxima and less or more than 2 minima,
        # raise an exception search for maxima only between 0.2*nbins and
        # 0.8*nbins (bad sampling on the edges can raise an error)
        # Also, save the bins of the minima.

        bins, des, dgs = self.data.get_columns(["bin", "Egap", "dGg_norm"])
        minima, maxima = [], []
        nbins = len(bins)
        for i in range(1, nbins-1):     # from the second to the second last

            dg, dgnext, dgprev = dgs[i], dgs[i+1], dgs[i-1]
            if dgprev >= dg and dg < dgnext:
                minima.append(i)
            elif dgprev <= dg and dg > dgnext and \
                                 i > nbins*0.2 and i < nbins*0.8:
                maxima.append(i)

        if len(minima) > 2 or len(maxima) > 1:
            # Bad sampling, more minima and maxima than wanted.
            # Get the highest maxima from those found so far.
            # Get the absolute minima to the left and to the right of this
            # maxima. Save the warning.
            max1 = max(maxima, key=lambda i: dgs[i])
            react = [(dgs[i], i) for i in minima if i < max1]
            prod = [(dgs[i], i) for i in minima if i > max1]
            try:
                min1 = min(react)[1]   # min() will return tuple with lowest dg
                min2 = min(prod)[1]
            except ValueError:
                # multiple minima on one side, none on the other
                # (starts/ends at the lowest point)
                raise QFepOutputError("Bad reaction free energy profile - "
                                      "reactants minima: {}, products minima: "
                                      "{}".format(len(react), len(prod)))

            self.warning = "Rough Free energy profile ({} minima and {} "\
                           "maxima found), look at the graphs!"\
                           "".format(len(minima), len(maxima))
            maxima = [max1,]
            minima = [min1, min2]

        if len(minima) != 2:
            raise QFepOutputError("Bad reaction free energy profile - {} "
                                  "local minima (instead of 2)"
                                  "".format(len(minima)))
        elif len(maxima) != 1:
            raise QFepOutputError("Bad reaction free energy profile - {} "
                                  "local maxima (instead of 1)"
                                  "".format(len(maxima)))

        self._dga = dgs[maxima[0]] - dgs[minima[0]]
        self._dg0 = dgs[minima[1]] - dgs[minima[0]]
        self._minima_bins = [bins[mini] for mini in minima]
        self._maxima_bins = [bins[maxi] for maxi in maxima]

        # adjust the values in data so that the reactants are zero
        colindex = self.data.column_titles.index("dGg_norm")
        for row in self.data.get_rows():
            row[colindex] = row[colindex] - dgs[minima[0]]