Ejemplo n.º 1
0
    def generate_table(self):
        """Generate a normalised table of the results.

        The table is stored as a :class:`numpy.recarray` in the
        attribute :attr:`~HydrogenBondAnalysis.table` and can be used
        with e.g. `recsql`_.

        Columns:
          0. "time"
          1. "donor_idx"
          2. "acceptor_idx"
          3. "donor_resnm"
          4. "donor_resid"
          5. "donor_atom"
          6. "acceptor_resnm"
          7. "acceptor_resid"
          8. "acceptor_atom"
          9. "distance"
          10. "angle"

        .. _recsql: http://pypi.python.org/pypi/RecSQL
        """
        from itertools import izip
        if self.timeseries is None:
            msg = "No timeseries computed, do run() first."
            warnings.warn(msg, category=MissingDataWarning)
            logger.warn(msg)
            return

        num_records = numpy.sum([len(hframe) for hframe in self.timeseries])
        dtype = [("time", float), ("donor_idx", int), ("acceptor_idx", int),
                 ("donor_resnm", "|S4"), ("donor_resid", int),
                 ("donor_atom", "|S4"), ("acceptor_resnm", "|S4"),
                 ("acceptor_resid", int), ("acceptor_atom", "|S4"),
                 ("distance", float), ("angle", float)]
        self.table = numpy.recarray((num_records, ), dtype=dtype)

        # according to Lukas' notes below, using a recarray at this stage is ineffective
        # and speedups of ~x10 could be achieved by filling a standard array
        # (perhaps at the cost of less clarity... but that might just be my code ;-) -- orbeckst)
        cursor = 0  # current row
        for t, hframe in izip(self.timesteps, self.timeseries):
            if len(hframe) == 0:
                continue  # not really necessary, should also work without
            self.table[cursor:cursor + len(hframe)].time = t
            for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe:
                r = self.table[cursor]
                r.donor_idx = donor_idx
                r.donor_resnm, r.donor_resid, r.donor_atom = parse_residue(
                    donor)
                r.acceptor_idx = acceptor_idx
                r.acceptor_resnm, r.acceptor_resid, r.acceptor_atom = parse_residue(
                    acceptor)
                r.distance = distance
                r.angle = angle
                cursor += 1
        assert cursor == num_records, "Internal Error: Not all HB records stored"
        logger.debug(
            "HBond: Stored results as table with %(num_records)d entries.",
            vars())
Ejemplo n.º 2
0
    def timesteps_by_type(self):
        """Frames during which each hydrogen bond existed, sorted by hydrogen bond.

        Processes :attr:`HydrogenBondAnalysis.timeseries` and returns
        a :class:`numpy.recarray` containing atom indices, residue
        names, residue numbers (for donors and acceptors) and a list
        of timesteps at which the hydrogen bond was detected.

        :Returns: a class:`numpy.recarray`
        """
        from itertools import izip
        if self.timeseries is None:
            msg = "No timeseries computed, do run() first."
            warnings.warn(msg, category=MissingDataWarning)
            logger.warn(msg)
            return

        hbonds = defaultdict(list)
        for (t, hframe) in izip(self.timesteps, self.timeseries):
            for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe:
                donor_resnm, donor_resid, donor_atom = parse_residue(donor)
                acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(
                    acceptor)
                # generate unambigous key for current hbond
                # (the donor_heavy_atom placeholder '?' is added later)
                hb_key = (donor_idx, acceptor_idx, donor_resnm, donor_resid,
                          "?", donor_atom, acceptor_resnm, acceptor_resid,
                          acceptor_atom)
                hbonds[hb_key].append(t)

        out_nrows = 0
        # count number of timesteps per key to get length of output table
        for ts_list in hbonds.itervalues():
            out_nrows += len(ts_list)

        # build empty output table
        dtype = [('donor_idx', int), ('acceptor_idx', int),
                 ('donor_resnm', 'S4'), ('donor_resid', int),
                 ('donor_heavy_atom', 'S4'), ('donor_atom', 'S4'),
                 ('acceptor_resnm', 'S4'), ('acceptor_resid', int),
                 ('acceptor_atom', 'S4'), ('time', float)]
        out = numpy.empty((out_nrows, ), dtype=dtype)

        out_row = 0
        for (key, times) in hbonds.iteritems():
            for tstep in times:
                out[out_row] = key + (tstep, )
                out_row += 1

        # return array as recarray
        # The recarray has not been used within the function, because accessing the
        # the elements of a recarray (3.65 us) is much slower then accessing those
        # of a ndarray (287 ns).
        r = out.view(numpy.recarray)

        # patch in donor heavy atom names (replaces '?' in the key)
        h2donor = self._donor_lookup_table_byindex()
        r.donor_heavy_atom[:] = [h2donor[idx - 1] for idx in r.donor_idx]

        return r
Ejemplo n.º 3
0
    def timesteps_by_type(self):
        """Frames during which each hydrogen bond existed, sorted by hydrogen bond.

        Processes :attr:`HydrogenBondAnalysis.timeseries` and returns
        a :class:`numpy.recarray` containing atom indices, residue
        names, residue numbers (for donors and acceptors) and a list
        of timesteps at which the hydrogen bond was detected.

        :Returns: a class:`numpy.recarray`
        """
        from itertools import izip
        if self.timeseries is None:
            msg = "No timeseries computed, do run() first."
            warnings.warn(msg, category=MissingDataWarning)
            logger.warn(msg)
            return

        hbonds = defaultdict(list)
        for (t,hframe) in izip(self.timesteps, self.timeseries):
            for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe:
                donor_resnm, donor_resid, donor_atom = parse_residue(donor)
                acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor)
                # generate unambigous key for current hbond
                # (the donor_heavy_atom placeholder '?' is added later)
                hb_key = (donor_idx, acceptor_idx,
                          donor_resnm,    donor_resid,  "?", donor_atom,
                          acceptor_resnm, acceptor_resid, acceptor_atom)
                hbonds[hb_key].append(t)

        out_nrows = 0
        # count number of timesteps per key to get length of output table
        for ts_list in hbonds.itervalues():
            out_nrows += len(ts_list)

        # build empty output table
        dtype = [('donor_idx', int), ('acceptor_idx', int),
                ('donor_resnm', 'S4'),    ('donor_resid', int), ('donor_heavy_atom', 'S4'),  ('donor_atom', 'S4'),
                ('acceptor_resnm', 'S4'), ('acceptor_resid', int), ('acceptor_atom', 'S4'),
                ('time', float)]
        out = numpy.empty((out_nrows,), dtype=dtype)

        out_row = 0
        for (key, times) in hbonds.iteritems():
            for tstep in times:
                out[out_row] = key + (tstep,)
                out_row += 1

        # return array as recarray
        # The recarray has not been used within the function, because accessing the
        # the elements of a recarray (3.65 us) is much slower then accessing those
        # of a ndarray (287 ns).
        r = out.view(numpy.recarray)

        # patch in donor heavy atom names (replaces '?' in the key)
        h2donor = self._donor_lookup_table_byindex()
        r.donor_heavy_atom[:] = [h2donor[idx-1] for idx in r.donor_idx]

        return r
Ejemplo n.º 4
0
    def generate_table(self):
        """Generate a normalised table of the results.

        The table is stored as a :class:`numpy.recarray` in the
        attribute :attr:`~HydrogenBondAnalysis.table` and can be used
        with e.g. `recsql`_.

        Columns:
          0. "time"
          1. "donor_idx"
          2. "acceptor_idx"
          3. "donor_resnm"
          4. "donor_resid"
          5. "donor_atom"
          6. "acceptor_resnm"
          7. "acceptor_resid"
          8. "acceptor_atom"
          9. "distance"
          10. "angle"

        .. _recsql: http://pypi.python.org/pypi/RecSQL
        """
        from itertools import izip

        if self.timeseries is None:
            msg = "No timeseries computed, do run() first."
            warnings.warn(msg, category=MissingDataWarning)
            logger.warn(msg)
            return

        num_records = numpy.sum([len(hframe) for hframe in self.timeseries])
        dtype = [
            ("time", float), ("donor_idx", int), ("acceptor_idx", int),
            ("donor_resnm", "|S4"), ("donor_resid", int), ("donor_atom", "|S4"),
            ("acceptor_resnm", "|S4"), ("acceptor_resid", int), ("acceptor_atom", "|S4"),
            ("distance", float), ("angle", float)]
        self.table = numpy.recarray((num_records,), dtype=dtype)

        # according to Lukas' notes below, using a recarray at this stage is ineffective
        # and speedups of ~x10 could be achieved by filling a standard array
        # (perhaps at the cost of less clarity... but that might just be my code ;-) -- orbeckst)
        cursor = 0  # current row
        for t, hframe in izip(self.timesteps, self.timeseries):
            if len(hframe) == 0:
                continue  # not really necessary, should also work without
            self.table[cursor:cursor + len(hframe)].time = t
            for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe:
                r = self.table[cursor]
                r.donor_idx = donor_idx
                r.donor_resnm, r.donor_resid, r.donor_atom = parse_residue(donor)
                r.acceptor_idx = acceptor_idx
                r.acceptor_resnm, r.acceptor_resid, r.acceptor_atom = parse_residue(acceptor)
                r.distance = distance
                r.angle = angle
                cursor += 1
        assert cursor == num_records, "Internal Error: Not all HB records stored"
        logger.debug("HBond: Stored results as table with %(num_records)d entries.", vars())
Ejemplo n.º 5
0
    def count_by_type(self):
        """Counts the frequency of hydrogen bonds of a specific type.

        Processes :attr:`HydrogenBondAnalysis.timeseries` and returns
        a :class:`numpy.recarray` containing atom indices, residue
        names, residue numbers (for donors and acceptors) and the
        fraction of the total time during which the hydrogen bond was
        detected.

        :Returns: a class:`numpy.recarray`
        """
        if self.timeseries is None:
            msg = "No timeseries computed, do run() first."
            warnings.warn(msg, category=MissingDataWarning)
            logger.warn(msg)
            return

        hbonds = defaultdict(int)
        for hframe in self.timeseries:
            for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe:
                donor_resnm, donor_resid, donor_atom = parse_residue(donor)
                acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor)
                # generate unambigous key for current hbond
                # (the donor_heavy_atom placeholder '?' is added later)
                hb_key = (
                    donor_idx, acceptor_idx,
                    donor_resnm, donor_resid, "?", donor_atom,
                    acceptor_resnm, acceptor_resid, acceptor_atom)

                hbonds[hb_key] += 1

        # build empty output table
        dtype = [
            ('donor_idx', int), ('acceptor_idx', int),
            ('donor_resnm', 'S4'), ('donor_resid', int), ('donor_heavy_atom', 'S4'), ('donor_atom', 'S4'),
            ('acceptor_resnm', 'S4'), ('acceptor_resid', int), ('acceptor_atom', 'S4'),
            ('frequency', float)
        ]
        out = numpy.empty((len(hbonds),), dtype=dtype)

        # float because of division later
        tsteps = float(len(self.timesteps))
        for cursor, (key, count) in enumerate(hbonds.iteritems()):
            out[cursor] = key + (count / tsteps,)

        # return array as recarray
        # The recarray has not been used within the function, because accessing the
        # the elements of a recarray (3.65 us) is much slower then accessing those
        # of a ndarray (287 ns).
        r = out.view(numpy.recarray)

        # patch in donor heavy atom names (replaces '?' in the key)
        h2donor = self._donor_lookup_table_byindex()
        r.donor_heavy_atom[:] = [h2donor[idx - 1] for idx in r.donor_idx]

        return r
Ejemplo n.º 6
0
def check_parse_residue(rstring, residue):
    assert_equal(util.parse_residue(rstring), residue)