def generate_table(self): """Generate a normalised table of the results. The table is stored as a :class:`numpy.recarray` in the attribute :attr:`~HydrogenBondAnalysis.table` and can be used with e.g. `recsql`_. Columns: 0. "time" 1. "donor_idx" 2. "acceptor_idx" 3. "donor_resnm" 4. "donor_resid" 5. "donor_atom" 6. "acceptor_resnm" 7. "acceptor_resid" 8. "acceptor_atom" 9. "distance" 10. "angle" .. _recsql: http://pypi.python.org/pypi/RecSQL """ from itertools import izip if self.timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return num_records = numpy.sum([len(hframe) for hframe in self.timeseries]) dtype = [("time", float), ("donor_idx", int), ("acceptor_idx", int), ("donor_resnm", "|S4"), ("donor_resid", int), ("donor_atom", "|S4"), ("acceptor_resnm", "|S4"), ("acceptor_resid", int), ("acceptor_atom", "|S4"), ("distance", float), ("angle", float)] self.table = numpy.recarray((num_records, ), dtype=dtype) # according to Lukas' notes below, using a recarray at this stage is ineffective # and speedups of ~x10 could be achieved by filling a standard array # (perhaps at the cost of less clarity... but that might just be my code ;-) -- orbeckst) cursor = 0 # current row for t, hframe in izip(self.timesteps, self.timeseries): if len(hframe) == 0: continue # not really necessary, should also work without self.table[cursor:cursor + len(hframe)].time = t for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe: r = self.table[cursor] r.donor_idx = donor_idx r.donor_resnm, r.donor_resid, r.donor_atom = parse_residue( donor) r.acceptor_idx = acceptor_idx r.acceptor_resnm, r.acceptor_resid, r.acceptor_atom = parse_residue( acceptor) r.distance = distance r.angle = angle cursor += 1 assert cursor == num_records, "Internal Error: Not all HB records stored" logger.debug( "HBond: Stored results as table with %(num_records)d entries.", vars())
def timesteps_by_type(self): """Frames during which each hydrogen bond existed, sorted by hydrogen bond. Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a :class:`numpy.recarray` containing atom indices, residue names, residue numbers (for donors and acceptors) and a list of timesteps at which the hydrogen bond was detected. :Returns: a class:`numpy.recarray` """ from itertools import izip if self.timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return hbonds = defaultdict(list) for (t, hframe) in izip(self.timesteps, self.timeseries): for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe: donor_resnm, donor_resid, donor_atom = parse_residue(donor) acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue( acceptor) # generate unambigous key for current hbond # (the donor_heavy_atom placeholder '?' is added later) hb_key = (donor_idx, acceptor_idx, donor_resnm, donor_resid, "?", donor_atom, acceptor_resnm, acceptor_resid, acceptor_atom) hbonds[hb_key].append(t) out_nrows = 0 # count number of timesteps per key to get length of output table for ts_list in hbonds.itervalues(): out_nrows += len(ts_list) # build empty output table dtype = [('donor_idx', int), ('acceptor_idx', int), ('donor_resnm', 'S4'), ('donor_resid', int), ('donor_heavy_atom', 'S4'), ('donor_atom', 'S4'), ('acceptor_resnm', 'S4'), ('acceptor_resid', int), ('acceptor_atom', 'S4'), ('time', float)] out = numpy.empty((out_nrows, ), dtype=dtype) out_row = 0 for (key, times) in hbonds.iteritems(): for tstep in times: out[out_row] = key + (tstep, ) out_row += 1 # return array as recarray # The recarray has not been used within the function, because accessing the # the elements of a recarray (3.65 us) is much slower then accessing those # of a ndarray (287 ns). r = out.view(numpy.recarray) # patch in donor heavy atom names (replaces '?' in the key) h2donor = self._donor_lookup_table_byindex() r.donor_heavy_atom[:] = [h2donor[idx - 1] for idx in r.donor_idx] return r
def timesteps_by_type(self): """Frames during which each hydrogen bond existed, sorted by hydrogen bond. Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a :class:`numpy.recarray` containing atom indices, residue names, residue numbers (for donors and acceptors) and a list of timesteps at which the hydrogen bond was detected. :Returns: a class:`numpy.recarray` """ from itertools import izip if self.timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return hbonds = defaultdict(list) for (t,hframe) in izip(self.timesteps, self.timeseries): for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe: donor_resnm, donor_resid, donor_atom = parse_residue(donor) acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor) # generate unambigous key for current hbond # (the donor_heavy_atom placeholder '?' is added later) hb_key = (donor_idx, acceptor_idx, donor_resnm, donor_resid, "?", donor_atom, acceptor_resnm, acceptor_resid, acceptor_atom) hbonds[hb_key].append(t) out_nrows = 0 # count number of timesteps per key to get length of output table for ts_list in hbonds.itervalues(): out_nrows += len(ts_list) # build empty output table dtype = [('donor_idx', int), ('acceptor_idx', int), ('donor_resnm', 'S4'), ('donor_resid', int), ('donor_heavy_atom', 'S4'), ('donor_atom', 'S4'), ('acceptor_resnm', 'S4'), ('acceptor_resid', int), ('acceptor_atom', 'S4'), ('time', float)] out = numpy.empty((out_nrows,), dtype=dtype) out_row = 0 for (key, times) in hbonds.iteritems(): for tstep in times: out[out_row] = key + (tstep,) out_row += 1 # return array as recarray # The recarray has not been used within the function, because accessing the # the elements of a recarray (3.65 us) is much slower then accessing those # of a ndarray (287 ns). r = out.view(numpy.recarray) # patch in donor heavy atom names (replaces '?' in the key) h2donor = self._donor_lookup_table_byindex() r.donor_heavy_atom[:] = [h2donor[idx-1] for idx in r.donor_idx] return r
def generate_table(self): """Generate a normalised table of the results. The table is stored as a :class:`numpy.recarray` in the attribute :attr:`~HydrogenBondAnalysis.table` and can be used with e.g. `recsql`_. Columns: 0. "time" 1. "donor_idx" 2. "acceptor_idx" 3. "donor_resnm" 4. "donor_resid" 5. "donor_atom" 6. "acceptor_resnm" 7. "acceptor_resid" 8. "acceptor_atom" 9. "distance" 10. "angle" .. _recsql: http://pypi.python.org/pypi/RecSQL """ from itertools import izip if self.timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return num_records = numpy.sum([len(hframe) for hframe in self.timeseries]) dtype = [ ("time", float), ("donor_idx", int), ("acceptor_idx", int), ("donor_resnm", "|S4"), ("donor_resid", int), ("donor_atom", "|S4"), ("acceptor_resnm", "|S4"), ("acceptor_resid", int), ("acceptor_atom", "|S4"), ("distance", float), ("angle", float)] self.table = numpy.recarray((num_records,), dtype=dtype) # according to Lukas' notes below, using a recarray at this stage is ineffective # and speedups of ~x10 could be achieved by filling a standard array # (perhaps at the cost of less clarity... but that might just be my code ;-) -- orbeckst) cursor = 0 # current row for t, hframe in izip(self.timesteps, self.timeseries): if len(hframe) == 0: continue # not really necessary, should also work without self.table[cursor:cursor + len(hframe)].time = t for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe: r = self.table[cursor] r.donor_idx = donor_idx r.donor_resnm, r.donor_resid, r.donor_atom = parse_residue(donor) r.acceptor_idx = acceptor_idx r.acceptor_resnm, r.acceptor_resid, r.acceptor_atom = parse_residue(acceptor) r.distance = distance r.angle = angle cursor += 1 assert cursor == num_records, "Internal Error: Not all HB records stored" logger.debug("HBond: Stored results as table with %(num_records)d entries.", vars())
def count_by_type(self): """Counts the frequency of hydrogen bonds of a specific type. Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a :class:`numpy.recarray` containing atom indices, residue names, residue numbers (for donors and acceptors) and the fraction of the total time during which the hydrogen bond was detected. :Returns: a class:`numpy.recarray` """ if self.timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return hbonds = defaultdict(int) for hframe in self.timeseries: for donor_idx, acceptor_idx, donor, acceptor, distance, angle in hframe: donor_resnm, donor_resid, donor_atom = parse_residue(donor) acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor) # generate unambigous key for current hbond # (the donor_heavy_atom placeholder '?' is added later) hb_key = ( donor_idx, acceptor_idx, donor_resnm, donor_resid, "?", donor_atom, acceptor_resnm, acceptor_resid, acceptor_atom) hbonds[hb_key] += 1 # build empty output table dtype = [ ('donor_idx', int), ('acceptor_idx', int), ('donor_resnm', 'S4'), ('donor_resid', int), ('donor_heavy_atom', 'S4'), ('donor_atom', 'S4'), ('acceptor_resnm', 'S4'), ('acceptor_resid', int), ('acceptor_atom', 'S4'), ('frequency', float) ] out = numpy.empty((len(hbonds),), dtype=dtype) # float because of division later tsteps = float(len(self.timesteps)) for cursor, (key, count) in enumerate(hbonds.iteritems()): out[cursor] = key + (count / tsteps,) # return array as recarray # The recarray has not been used within the function, because accessing the # the elements of a recarray (3.65 us) is much slower then accessing those # of a ndarray (287 ns). r = out.view(numpy.recarray) # patch in donor heavy atom names (replaces '?' in the key) h2donor = self._donor_lookup_table_byindex() r.donor_heavy_atom[:] = [h2donor[idx - 1] for idx in r.donor_idx] return r
def check_parse_residue(rstring, residue): assert_equal(util.parse_residue(rstring), residue)