class TestCustomFeature(unittest.TestCase): def setUp(self): self.feat = MDFeaturizer(pdbfile) self.traj = mdtraj.load(xtcfile, top=pdbfile) self.pairs = [[0, 1], [0, 2], [1, 2]] #some distances self.means = [.5, .75, 1.0] #bogus means self.U = np.array([[0, 1], [1, 0], [ 1, 1 ]]) #bogus transformation, projects from 3 distances to 2 components def test_some_feature(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) Y_custom_feature = self.feat.transform(self.traj) # Directly call the function Y_function = some_call_to_mdtraj_some_operations_some_linalg( self.traj, self.pairs, self.means, self.U) assert np.allclose(Y_custom_feature, Y_function) def test_describe(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) self.feat.describe() def test_dimensionality(self): self.feat.add_custom_func( some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1], self.pairs, self.means, self.U) assert self.feat.dimension() == self.U.shape[1]
def setUp(self): self.pdbfile = pdbfile self.traj = mdtraj.load(xtcfile, top=self.pdbfile) self.feat = MDFeaturizer(self.pdbfile) self.atol = 1e-5 self.ref_frame = 0 self.atom_indices = np.arange(0, self.traj.n_atoms / 2)
def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" super(FeatureReader, self).__init__(chunksize=chunksize) # files if isinstance(trajectories, string_types): trajectories = [trajectories] self.trajfiles = trajectories self.topfile = topologyfile # featurizer if topologyfile and featurizer: self._logger.warning("Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # Check that the topology and the files in the filelist can actually work together self._assert_toptraj_consistency() # iteration self._mditer = None # current lag time self._curr_lag = 0 # time lagged iterator self._mditer2 = None self.__set_dimensions_and_lengths() self._parametrized = True
def setUp(self): self.feat = MDFeaturizer(pdbfile) self.traj = mdtraj.load(xtcfile, top=pdbfile) self.pairs = [[0, 1], [0, 2], [1, 2]] #some distances self.means = [.5, .75, 1.0] #bogus means self.U = np.array([[0, 1], [1, 0], [ 1, 1 ]]) #bogus transformation, projects from 3 distances to 2 components
def test_backbone_dihedrals_deg(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(deg=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension())
def test_ca_distances_with_all_atom_geometries(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=0) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms)) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca))
def test_backbone_dihedrials_chi(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension())
def test_backbone_dihedrals_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile) Y = self.feat.transform(traj) self.assertEqual(Y.shape, (len(traj), 3 * 4)) # (3 phi + 3 psi)*2 [cos, sin] assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension())
class TestStaticMethods(unittest.TestCase): def setUp(self): self.feat = MDFeaturizer(pdbfile) def test_pairs(self): n_at = 5 pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=3) assert np.allclose(pairs, [0, 4]) pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=2) assert np.allclose(pairs, [[0, 3], [0, 4], [1, 4]]) pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=1) assert np.allclose(pairs, [[0, 2], [0, 3], [0, 4], [1, 3], [1, 4], [2, 4]]) pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=0) assert np.allclose(pairs, [[0, 1], [0, 2], [0, 3], [0, 4], [1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]])
def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" # init with chunksize 100 super(FeatureReader, self).__init__(chunksize=chunksize) self.data_producer = self # files if isinstance(trajectories, basestring): trajectories = [trajectories] self.trajfiles = trajectories self.topfile = topologyfile # featurizer if topologyfile and featurizer: self._logger.warning( "Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # iteration self._mditer = None # current lag time self._curr_lag = 0 # time lagged iterator self._mditer2 = None # cache size self.in_memory = False self._Y = None self.__set_dimensions_and_lenghts() self._parametrized = True
def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" # init with chunksize 100 super(FeatureReader, self).__init__(chunksize=chunksize) self.data_producer = self # files if isinstance(trajectories, basestring): trajectories = [trajectories] self.trajfiles = trajectories self.topfile = topologyfile # featurizer if topologyfile and featurizer: self._logger.warning("Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # iteration self._mditer = None # current lag time self._curr_lag = 0 # time lagged iterator self._mditer2 = None # cache size self.in_memory = False self._Y = None self.__set_dimensions_and_lenghts() self._parametrized = True
class TestFeaturizer(unittest.TestCase): def setUp(self): self.pdbfile = pdbfile self.traj = mdtraj.load(xtcfile, top=self.pdbfile) self.feat = MDFeaturizer(self.pdbfile) def test_select_backbone(self): inds = self.feat.select_Backbone() def test_select_all(self): self.feat.add_all() assert (self.feat.dimension() == self.traj.n_atoms * 3) refmap = np.reshape(self.traj.xyz, (len(self.traj), self.traj.n_atoms * 3)) assert (np.all(refmap == self.feat.map(self.traj))) def test_select(self): sel = np.array([1, 2, 5, 20], dtype=int) self.feat.add_selection(sel) assert (self.feat.dimension() == sel.shape[0] * 3) refmap = np.reshape(self.traj.xyz[:, sel, :], (len(self.traj), sel.shape[0] * 3)) assert (np.all(refmap == self.feat.map(self.traj))) def test_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.map(self.traj))) def test_inverse_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_inverse_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(Dinv, self.feat.map(self.traj))) def test_ca_distances(self): sel = self.feat.select_Ca() assert (np.all(sel == range(self.traj.n_atoms)) ) # should be all for this Ca-traj pairs = self.feat.pairs(sel) self.feat.add_distances_ca( periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs.shape[0]) X = self.traj.xyz[:, pairs[:, 0], :] Y = self.traj.xyz[:, pairs[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.map(self.traj))) def test_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 assert (np.allclose(C, self.feat.map(self.traj))) def test_angles(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.map(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) def test_angles_deg(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.map(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) def test_dihedrals(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.map(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) def test_dihedrals_deg(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.map(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) def test_backbone_dihedrals(self): # TODO: test me pass def test_backbone_dihedrals_deg(self): # TODO: test me pass def test_custom_feature(self): # TODO: test me pass
def testAddFeaturesWithDuplicates(self): """this tests adds multiple features twice (eg. same indices) and checks whether they are rejected or not""" featurizer = MDFeaturizer(pdbfile) featurizer.add_angles([[0, 1, 2], [0, 3, 4]]) featurizer.add_angles([[0, 1, 2], [0, 3, 4]]) self.assertEqual(len(featurizer.active_features), 1) featurizer.add_backbone_torsions() self.assertEqual(len(featurizer.active_features), 2) featurizer.add_backbone_torsions() self.assertEqual(len(featurizer.active_features), 2) featurizer.add_contacts([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 3) featurizer.add_contacts([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 3) # try to fool it with ca selection ca = featurizer.select_Ca() ca = featurizer.pairs(ca) featurizer.add_distances(ca) self.assertEqual(len(featurizer.active_features), 4) featurizer.add_distances_ca() self.assertEqual(len(featurizer.active_features), 4) featurizer.add_inverse_distances([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 5) featurizer.add_distances([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 6) featurizer.add_distances([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), 6) def my_func(x): return x - 1 def foo(x): return x - 1 my_feature = CustomFeature(my_func) my_feature.dimension = 3 featurizer.add_custom_feature(my_feature) self.assertEqual(len(featurizer.active_features), 7) featurizer.add_custom_feature(my_feature) self.assertEqual(len(featurizer.active_features), 7) # since myfunc and foo are different functions, it should be added foo_feat = CustomFeature(foo, dim=3) featurizer.add_custom_feature(foo_feat) self.assertEqual(len(featurizer.active_features), 8)
class FeatureReader(ReaderInterface): """ Reads features from MD data. To select a feature, access the :attr:`featurizer` and call a feature selecting method (e.g) distances. Parameters ---------- trajectories: list of strings paths to trajectory files topologyfile: string path to topology file (e.g. pdb) Examples -------- >>> from pyemma.datasets import get_bpti_test_data Iterator access: >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top']) Optionally set a chunksize >>> reader.chunksize = 300 Store chunks by their trajectory index >>> chunks = {i : [] for i in range(reader.number_of_trajectories())} >>> for itraj, X in reader: ... chunks[itraj].append(X) Calculate some distances of protein during feature reading: >>> reader.featurizer.add_distances([[0, 3], [10, 15]]) >>> X = reader.get_output() """ def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" super(FeatureReader, self).__init__(chunksize=chunksize) # files if isinstance(trajectories, string_types): trajectories = [trajectories] self.trajfiles = trajectories self.topfile = topologyfile # featurizer if topologyfile and featurizer: self._logger.warning("Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # Check that the topology and the files in the filelist can actually work together self._assert_toptraj_consistency() # iteration self._mditer = None # current lag time self._curr_lag = 0 # time lagged iterator self._mditer2 = None self.__set_dimensions_and_lengths() self._parametrized = True def __set_dimensions_and_lengths(self): self._ntraj = len(self.trajfiles) # lookups pre-computed lengths, or compute it on the fly and store it in db. if config['use_trajectory_lengths_cache'] == 'True': from pyemma.coordinates.data.traj_info_cache import TrajectoryInfoCache for traj in self.trajfiles: self._lengths.append(TrajectoryInfoCache[traj]) else: for traj in self.trajfiles: with mdtraj.open(traj, mode='r') as fh: self._lengths.append(len(fh)) # number of trajectories/data sets if self._ntraj == 0: raise ValueError("no valid data") # note: dimension is a custom impl in this class def describe(self): """ Returns a description of this transformer :return: """ return ["Feature reader with following features"] + self.featurizer.describe() def dimension(self): """ Returns the number of output dimensions :return: """ if len(self.featurizer.active_features) == 0: # special case: Cartesian coordinates return self.featurizer.topology.n_atoms * 3 else: # general case return self.featurizer.dimension() def _create_iter(self, filename, skip=0, stride=1, atom_indices=None): return patches.iterload(filename, chunk=self.chunksize, top=self.topfile, skip=skip, stride=stride, atom_indices=atom_indices) def _close(self): try: if self._mditer: self._mditer.close() if self._mditer2: self._mditer2.close() except: self._logger.exception("something went wrong closing file handles") def _reset(self, context=None): """ resets the chunk reader """ self._itraj = 0 self._curr_lag = 0 if len(self.trajfiles) >= 1: self._t = 0 if context and not context.uniform_stride: self._itraj = min(context.traj_keys) self._mditer = self._create_iter( self.trajfiles[self._itraj], stride=context.ra_indices_for_traj(self._itraj) ) else: self._mditer = self._create_iter(self.trajfiles[0], stride=context.stride if context else 1) def _next_chunk(self, context=None): """ gets the next chunk. If lag > 0, we open another iterator with same chunk size and advance it by one, as soon as this method is called with a lag > 0. :return: a feature mapped vector X, or (X, Y) if lag > 0 """ chunk = next(self._mditer) shape = chunk.xyz.shape if context.lag > 0: if not context.uniform_stride: raise ValueError("random access stride with lag not supported") if self._curr_lag == 0: # lag time or trajectory index changed, so open lagged iterator if __debug__: self._logger.debug("open time lagged iterator for traj %i with lag %i" % (self._itraj, context.lag)) self._curr_lag = context.lag self._mditer2 = self._create_iter(self.trajfiles[self._itraj], skip=self._curr_lag, stride=context.stride) try: adv_chunk = next(self._mditer2) except StopIteration: # When _mditer2 ran over the trajectory end, return empty chunks. adv_chunk = mdtraj.Trajectory(np.empty((0, shape[1], shape[2]), np.float32), chunk.topology) except RuntimeError as e: if "seek error" in str(e): raise RuntimeError("Trajectory %s too short for lag time %i" % (self.trajfiles[self._itraj], context.lag)) self._t += shape[0] if (self._t >= self.trajectory_length(self._itraj, stride=context.stride) and self._itraj < len(self.trajfiles) - 1): if __debug__: self._logger.debug('closing current trajectory "%s"' % self.trajfiles[self._itraj]) self._close() self._t = 0 self._itraj += 1 if not context.uniform_stride: while self._itraj not in context.traj_keys and self._itraj < self.number_of_trajectories(): self._itraj += 1 self._mditer = self._create_iter( self.trajfiles[self._itraj], stride=context.ra_indices_for_traj(self._itraj) ) else: self._mditer = self._create_iter(self.trajfiles[self._itraj], stride=context.stride) # we open self._mditer2 only if requested due lag parameter! self._curr_lag = 0 if not context.uniform_stride: traj_len = context.ra_trajectory_length(self._itraj) else: traj_len = self.trajectory_length(self._itraj) if self._t >= traj_len and self._itraj == len(self.trajfiles) - 1: if __debug__: self._logger.debug('closing last trajectory "%s"' % self.trajfiles[self._itraj]) self._mditer.close() if self._curr_lag != 0: self._mditer2.close() # map data if context.lag == 0: if len(self.featurizer.active_features) == 0: shape_2d = (shape[0], shape[1] * shape[2]) return chunk.xyz.reshape(shape_2d) else: return self.featurizer.transform(chunk) else: if len(self.featurizer.active_features) == 0: shape_Y = adv_chunk.xyz.shape X = chunk.xyz.reshape((shape[0], shape[1] * shape[2])) Y = adv_chunk.xyz.reshape((shape_Y[0], shape_Y[1] * shape_Y[2])) else: X = self.featurizer.transform(chunk) Y = self.featurizer.transform(adv_chunk) return X, Y def parametrize(self, stride=1): if self.in_memory: self._map_to_memory(stride) def _assert_toptraj_consistency(self): r""" Check if the topology and the trajfiles of the reader have the same n_atoms""" traj = mdtraj.load_frame(self.trajfiles[0], index=0, top=self.topfile) desired_n_atoms = self.featurizer.topology.n_atoms assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \ " and the first trajectory file, %u vs %u"% \ (desired_n_atoms, traj.xyz.shape[1])
class FeatureReader(ReaderInterface): """ Reads features from MD data. To select a feature, access the :attr:`featurizer` and call a feature selecting method (e.g) distances. Parameters ---------- trajectories: list of strings paths to trajectory files topologyfile: string path to topology file (e.g. pdb) Examples -------- Iterator access: >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb') >>> chunks = [] >>> for itraj, X in reader: >>> chunks.append(X) Extract backbone torsion angles of protein during feature reading: >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb') >>> reader.featurizer.add_backbone_torsions() >>> X = reader.get_output() """ def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" # init with chunksize 100 super(FeatureReader, self).__init__(chunksize=chunksize) self.data_producer = self # files if isinstance(trajectories, basestring): trajectories = [trajectories] self.trajfiles = trajectories self.topfile = topologyfile # featurizer if topologyfile and featurizer: self._logger.warning( "Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # iteration self._mditer = None # current lag time self._curr_lag = 0 # time lagged iterator self._mditer2 = None # cache size self.in_memory = False self._Y = None self.__set_dimensions_and_lenghts() self._parametrized = True # @classmethod # def init_from_featurizer(cls, trajectories, featurizer): # if not isinstance(featurizer, MDFeaturizer): # raise ValueError("given featurizer is not of type Featurizer, but is %s" # % type(featurizer)) # cls.featurizer = featurizer # return cls(trajectories, featurizer.topologyfile) def __set_dimensions_and_lenghts(self): self._ntraj = len(self.trajfiles) # basic statistics for traj in self.trajfiles: sum_frames = sum(t.n_frames for t in self._create_iter(traj)) self._lengths.append(sum_frames) # number of trajectories/data sets if self._ntraj == 0: raise ValueError("no valid data") # note: dimension is a custom impl in this class def describe(self): """ Returns a description of this transformer :return: """ return ["Feature reader with following features" ] + self.featurizer.describe() def parametrize(self, stride=1): """ Parametrizes this transformer :return: """ if self.in_memory: self._map_to_memory(stride=stride) def dimension(self): """ Returns the number of output dimensions :return: """ if len(self.featurizer.active_features) == 0: # special case: cartesion coordinates return self.featurizer.topology.n_atoms * 3 else: # general case return self.featurizer.dimension() def _get_memory_per_frame(self): """ Returns the memory requirements per frame, in bytes :return: """ return 4 * self.dimension() def _get_constant_memory(self): """ Returns the constant memory requirements, in bytes :return: """ return 0 def _map_to_memory(self, stride=1): # TODO: stride is currently not implemented if stride > 1: raise NotImplementedError( 'stride option for FeatureReader._map_to_memory is currently not implemented' ) self._reset() # iterate over trajectories last_chunk = False itraj = 0 while not last_chunk: last_chunk_in_traj = False t = 0 while not last_chunk_in_traj: y = self._next_chunk() assert y is not None L = np.shape(y)[0] # last chunk in traj? last_chunk_in_traj = (t + L >= self.trajectory_length(itraj)) # last chunk? last_chunk = (last_chunk_in_traj and itraj >= self.number_of_trajectories() - 1) # write self._Y[itraj][t:t + L] = y # increment time t += L # increment trajectory itraj += 1 def _create_iter(self, filename, skip=0, stride=1): return patches.iterload(filename, chunk=self.chunksize, top=self.topfile, skip=skip, stride=stride) def _reset(self, stride=1): """ resets the chunk reader """ self._itraj = 0 self._curr_lag = 0 if len(self.trajfiles) >= 1: self._t = 0 self._mditer = self._create_iter(self.trajfiles[0], stride=stride) def _next_chunk(self, lag=0, stride=1): """ gets the next chunk. If lag > 0, we open another iterator with same chunk size and advance it by one, as soon as this method is called with a lag > 0. :return: a feature mapped vector X, or (X, Y) if lag > 0 """ chunk = self._mditer.next() shape = chunk.xyz.shape if lag > 0: if self._curr_lag == 0: # lag time or trajectory index changed, so open lagged iterator if __debug__: self._logger.debug( "open time lagged iterator for traj %i with lag %i" % (self._itraj, self._curr_lag)) self._curr_lag = lag self._mditer2 = self._create_iter(self.trajfiles[self._itraj], skip=self._curr_lag * stride, stride=stride) try: adv_chunk = self._mditer2.next() except StopIteration: # When _mditer2 ran over the trajectory end, return empty chunks. adv_chunk = mdtraj.Trajectory( np.empty((0, shape[1], shape[2]), np.float32), chunk.topology) self._t += shape[0] if (self._t >= self.trajectory_length(self._itraj, stride=stride) and self._itraj < len(self.trajfiles) - 1): if __debug__: self._logger.debug('closing current trajectory "%s"' % self.trajfiles[self._itraj]) self._mditer.close() if self._curr_lag != 0: self._mditer2.close() self._t = 0 self._itraj += 1 self._mditer = self._create_iter(self.trajfiles[self._itraj], stride=stride) # we open self._mditer2 only if requested due lag parameter! self._curr_lag = 0 if (self._t >= self.trajectory_length(self._itraj, stride=stride) and self._itraj == len(self.trajfiles) - 1): if __debug__: self._logger.debug('closing last trajectory "%s"' % self.trajfiles[self._itraj]) self._mditer.close() if self._curr_lag != 0: self._mditer2.close() # map data if lag == 0: if len(self.featurizer.active_features) == 0: shape_2d = (shape[0], shape[1] * shape[2]) return chunk.xyz.reshape(shape_2d) else: return self.featurizer.map(chunk) else: if len(self.featurizer.active_features) == 0: shape_Y = adv_chunk.xyz.shape X = chunk.xyz.reshape((shape[0], shape[1] * shape[2])) Y = adv_chunk.xyz.reshape( (shape_Y[0], shape_Y[1] * shape_Y[2])) else: X = self.featurizer.map(chunk) Y = self.featurizer.map(adv_chunk) return X, Y
class TestFeaturizer(unittest.TestCase): @classmethod def setUpClass(cls): import tempfile cls.asn_leu_pdbfile = tempfile.mkstemp(suffix=".pdb")[1] with open(cls.asn_leu_pdbfile, 'w') as fh: fh.write(asn_leu_pdb) cls.asn_leu_traj = tempfile.mktemp(suffix='.xtc') # create traj for asn_leu n_frames = 4001 traj = mdtraj.load(cls.asn_leu_pdbfile) ref = traj.xyz new_xyz = np.empty((n_frames, ref.shape[1], 3)) noise = np.random.random(new_xyz.shape) new_xyz[:, :, :] = noise + ref traj.xyz = new_xyz traj.time = np.arange(n_frames) traj.save(cls.asn_leu_traj) super(TestFeaturizer, cls).setUpClass() @classmethod def tearDownClass(cls): try: os.unlink(cls.asn_leu_pdbfile) except EnvironmentError: pass super(TestFeaturizer, cls).tearDownClass() def setUp(self): self.pdbfile = pdbfile self.traj = mdtraj.load(xtcfile, top=self.pdbfile) self.feat = MDFeaturizer(self.pdbfile) self.atol = 1e-5 self.ref_frame = 0 self.atom_indices = np.arange(0, self.traj.n_atoms / 2) def test_select_backbone(self): inds = self.feat.select_Backbone() def test_select_all(self): self.feat.add_all() assert (self.feat.dimension() == self.traj.n_atoms * 3) refmap = np.reshape(self.traj.xyz, (len(self.traj), self.traj.n_atoms * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_select(self): sel = np.array([1, 2, 5, 20], dtype=int) self.feat.add_selection(sel) assert (self.feat.dimension() == sel.shape[0] * 3) refmap = np.reshape(self.traj.xyz[:, sel, :], (len(self.traj), sel.shape[0] * 3)) assert (np.all(refmap == self.feat.transform(self.traj))) def test_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_inverse_distances(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_inverse_distances( pairs, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(Dinv, self.feat.transform(self.traj))) def test_ca_distances(self): sel = self.feat.select_Ca() assert (np.all(sel == list(range(self.traj.n_atoms))) ) # should be all for this Ca-traj pairs = self.feat.pairs(sel, excluded_neighbors=0) self.feat.add_distances_ca( periodic=False, excluded_neighbors=0 ) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs.shape[0]) X = self.traj.xyz[:, pairs[:, 0], :] Y = self.traj.xyz[:, pairs[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) assert (np.allclose(D, self.feat.transform(self.traj))) def test_ca_distances_with_all_atom_geometries(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=0) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms)) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_ca_distances_with_all_atom_geometries_and_exclusions(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=2) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2) feat_just_ca.add_distances(ca_pairs) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca)) def test_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False) # unperiodic distances such that we can compare assert (self.feat.dimension() == pairs_expected.shape[0]) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 assert (np.allclose(C, self.feat.transform(self.traj))) def test_contacts_count_contacts(self): sel = np.array([1, 2, 5, 20], dtype=int) pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]]) pairs = self.feat.pairs(sel, excluded_neighbors=2) assert (pairs.shape == pairs_expected.shape) assert (np.all(pairs == pairs_expected)) self.feat.add_contacts( pairs, threshold=0.5, periodic=False, count_contacts=True ) # unperiodic distances such that we can compare # The dimensionality of the feature is now one assert (self.feat.dimension() == 1) X = self.traj.xyz[:, pairs_expected[:, 0], :] Y = self.traj.xyz[:, pairs_expected[:, 1], :] D = np.sqrt(np.sum((X - Y)**2, axis=2)) C = np.zeros(D.shape) I = np.argwhere(D <= 0.5) C[I[:, 0], I[:, 1]] = 1.0 # Count the contacts C = C.sum(1, keepdims=True) assert (np.allclose(C, self.feat.transform(self.traj))) def test_angles(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_angles_deg(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) def test_angles_cossin(self): sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int) self.feat.add_angles(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_dihedrals(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrals_deg(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, deg=True) assert (self.feat.dimension() == sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) self.assertEqual(len(self.feat.describe()), self.feat.dimension()) def test_dihedrials_cossin(self): sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int) self.feat.add_dihedrals(sel, cossin=True) assert (self.feat.dimension() == 2 * sel.shape[0]) Y = self.feat.transform(self.traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals_deg(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(deg=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -180.0)) assert (np.alltrue(Y <= 180.0)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrals_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_backbone_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile) Y = self.feat.transform(traj) self.assertEqual(Y.shape, (len(traj), 3 * 4)) # (3 phi + 3 psi)*2 [cos, sin] assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrials_chi(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions() traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() self.assertEqual(len(desc), self.feat.dimension()) def test_backbone_dihedrials_chi_cossin(self): self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile) self.feat.add_chi1_torsions(cossin=True) traj = mdtraj.load(self.asn_leu_pdbfile) Y = self.feat.transform(traj) assert (np.alltrue(Y >= -np.pi)) assert (np.alltrue(Y <= np.pi)) desc = self.feat.describe() assert "COS" in desc[0] assert "SIN" in desc[1] self.assertEqual(len(desc), self.feat.dimension()) def test_custom_feature(self): # TODO: test me pass def test_MinRmsd(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame]) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame]) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_MinRmsd_with_atom_indices_precentered(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices, precentered=True) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices, precentered=True) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2 def test_Residue_Mindist_Ca_all(self): n_ca = self.feat.topology.n_atoms self.feat.add_residue_mindist(scheme='ca') D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_all_threshold(self): threshold = .7 self.feat.add_residue_mindist(scheme='ca', threshold=threshold) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Residue_Mindist_Ca_array(self): contacts = np.array([[ 20, 10, ], [10, 0]]) self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca', contacts=contacts)[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_One_Group(self): group0 = [0, 20, 30, 0] self.feat.add_group_mindist( group_definitions=[group0]) # Even with duplicates D = self.feat.transform(self.traj) dist_list = list(combinations(np.unique(group0), 2)) Dref = mdtraj.compute_distances(self.traj, dist_list) assert np.allclose(D.squeeze(), Dref.min(1)) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2]) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_All_Three_Groups_threshold(self): threshold = .7 group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] self.feat.add_group_mindist(group_definitions=[group0, group1, group2], threshold=threshold) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_12 = np.array( list(product(np.unique(group1), np.unique(group2)))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1) Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension() def test_Group_Mindist_Some_Three_Groups(self): group0 = [0, 20, 30, 0] group1 = [1, 21, 31, 1] group2 = [2, 22, 32, 2] group_pairs = np.array([[0, 1], [2, 2], [0, 2]]) self.feat.add_group_mindist(group_definitions=[group0, group1, group2], group_pairs=group_pairs) D = self.feat.transform(self.traj) # Now the references, computed separately for each combination of groups dist_list_01 = np.array( list(product(np.unique(group0), np.unique(group1)))) dist_list_02 = np.array( list(product(np.unique(group0), np.unique(group2)))) dist_list_22 = np.array(list(combinations(np.unique(group2), 2))) Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1) Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1) Dref_22 = mdtraj.compute_distances(self.traj, dist_list_22).min(1) Dref = np.vstack((Dref_01, Dref_22, Dref_02)).T assert np.allclose(D.squeeze(), Dref) assert len(self.feat.describe()) == self.feat.dimension()
def setUp(self): self.feat = MDFeaturizer(pdbfile)
def test_labels(self): """ just checks for exceptions """ featurizer = MDFeaturizer(pdbfile) featurizer.add_angles([[1, 2, 3], [4, 5, 6]]) featurizer.add_backbone_torsions() featurizer.add_contacts([[0, 1], [0, 3]]) featurizer.add_distances([[0, 1], [0, 3]]) featurizer.add_inverse_distances([[0, 1], [0, 3]]) cs = CustomFeature(lambda x: x - 1, dim=3) featurizer.add_custom_feature(cs) featurizer.add_minrmsd_to_ref(pdbfile) featurizer.add_residue_mindist() featurizer.add_group_mindist([[0, 1], [0, 2]]) featurizer.describe()
def testAddFeaturesWithDuplicates(self): """this tests adds multiple features twice (eg. same indices) and checks whether they are rejected or not""" featurizer = MDFeaturizer(pdbfile) expected_active = 1 featurizer.add_angles([[0, 1, 2], [0, 3, 4]]) featurizer.add_angles([[0, 1, 2], [0, 3, 4]]) self.assertEqual(len(featurizer.active_features), expected_active) featurizer.add_contacts([[0, 1], [0, 3]]) expected_active += 1 self.assertEqual(len(featurizer.active_features), expected_active) featurizer.add_contacts([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), expected_active) # try to fool it with ca selection ca = featurizer.select_Ca() ca = featurizer.pairs(ca, excluded_neighbors=0) featurizer.add_distances(ca) expected_active += 1 self.assertEqual(len(featurizer.active_features), expected_active) featurizer.add_distances_ca(excluded_neighbors=0) self.assertEqual(len(featurizer.active_features), expected_active) featurizer.add_inverse_distances([[0, 1], [0, 3]]) expected_active += 1 self.assertEqual(len(featurizer.active_features), expected_active) featurizer.add_distances([[0, 1], [0, 3]]) expected_active += 1 self.assertEqual(len(featurizer.active_features), expected_active) featurizer.add_distances([[0, 1], [0, 3]]) self.assertEqual(len(featurizer.active_features), expected_active) def my_func(x): return x - 1 def foo(x): return x - 1 expected_active += 1 my_feature = CustomFeature(my_func) my_feature.dimension = 3 featurizer.add_custom_feature(my_feature) self.assertEqual(len(featurizer.active_features), expected_active) featurizer.add_custom_feature(my_feature) self.assertEqual(len(featurizer.active_features), expected_active) # since myfunc and foo are different functions, it should be added expected_active += 1 foo_feat = CustomFeature(foo, dim=3) featurizer.add_custom_feature(foo_feat) self.assertEqual(len(featurizer.active_features), expected_active) expected_active += 1 ref = mdtraj.load(xtcfile, top=pdbfile) featurizer.add_minrmsd_to_ref(ref) featurizer.add_minrmsd_to_ref(ref) self.assertEquals(len(featurizer.active_features), expected_active) expected_active += 1 featurizer.add_minrmsd_to_ref(pdbfile) featurizer.add_minrmsd_to_ref(pdbfile) self.assertEquals(len(featurizer.active_features), expected_active) expected_active += 1 featurizer.add_residue_mindist() featurizer.add_residue_mindist() self.assertEquals(len(featurizer.active_features), expected_active) expected_active += 1 featurizer.add_group_mindist([[0, 1], [0, 2]]) featurizer.add_group_mindist([[0, 1], [0, 2]]) self.assertEquals(len(featurizer.active_features), expected_active)
def setUp(self): self.pdbfile = pdbfile self.traj = mdtraj.load(xtcfile, top=self.pdbfile) self.feat = MDFeaturizer(self.pdbfile)
def test_ca_distances_with_all_atom_geometries_and_exclusions(self): feat = MDFeaturizer(pdbfile_ops_aa) feat.add_distances_ca(excluded_neighbors=2) D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa)) # Create a reference feat_just_ca = MDFeaturizer(pdbfile_ops_Ca) ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2) feat_just_ca.add_distances(ca_pairs) D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca)) assert (np.allclose(D_aa, D_ca))
def test_labels(self): """ just checks for exceptions """ featurizer = MDFeaturizer(pdbfile) featurizer.add_angles([[1, 2, 3], [4, 5, 6]]) featurizer.add_backbone_torsions() featurizer.add_contacts([[0, 1], [0, 3]]) featurizer.add_distances([[0, 1], [0, 3]]) featurizer.add_inverse_distances([[0, 1], [0, 3]]) cs = CustomFeature(lambda x: x - 1) cs.dimension = lambda: 3 featurizer.add_custom_feature(cs) featurizer.describe()
class FeatureReader(ReaderInterface): """ Reads features from MD data. To select a feature, access the :attr:`featurizer` and call a feature selecting method (e.g) distances. Parameters ---------- trajectories: list of strings paths to trajectory files topologyfile: string path to topology file (e.g. pdb) Examples -------- Iterator access: >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb') >>> chunks = [] >>> for itraj, X in reader: >>> chunks.append(X) Extract backbone torsion angles of protein during feature reading: >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb') >>> reader.featurizer.add_backbone_torsions() >>> X = reader.get_output() """ def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None): assert (topologyfile is not None) or (featurizer is not None), \ "Needs either a topology file or a featurizer for instantiation" # init with chunksize 100 super(FeatureReader, self).__init__(chunksize=chunksize) self.data_producer = self # files if isinstance(trajectories, basestring): trajectories = [trajectories] self.trajfiles = trajectories self.topfile = topologyfile # featurizer if topologyfile and featurizer: self._logger.warning("Both a topology file and a featurizer were given as arguments. " "Only featurizer gets respected in this case.") if not featurizer: self.featurizer = MDFeaturizer(topologyfile) else: self.featurizer = featurizer self.topfile = featurizer.topologyfile # iteration self._mditer = None # current lag time self._curr_lag = 0 # time lagged iterator self._mditer2 = None # cache size self.in_memory = False self._Y = None self.__set_dimensions_and_lenghts() self._parametrized = True # @classmethod # def init_from_featurizer(cls, trajectories, featurizer): # if not isinstance(featurizer, MDFeaturizer): # raise ValueError("given featurizer is not of type Featurizer, but is %s" # % type(featurizer)) # cls.featurizer = featurizer # return cls(trajectories, featurizer.topologyfile) def __set_dimensions_and_lenghts(self): self._ntraj = len(self.trajfiles) # basic statistics for traj in self.trajfiles: sum_frames = sum(t.n_frames for t in self._create_iter(traj)) self._lengths.append(sum_frames) # number of trajectories/data sets if self._ntraj == 0: raise ValueError("no valid data") # note: dimension is a custom impl in this class def describe(self): """ Returns a description of this transformer :return: """ return ["Feature reader with following features"] + self.featurizer.describe() def parametrize(self, stride=1): """ Parametrizes this transformer :return: """ if self.in_memory: self._map_to_memory(stride=stride) def dimension(self): """ Returns the number of output dimensions :return: """ if len(self.featurizer.active_features) == 0: # special case: cartesion coordinates return self.featurizer.topology.n_atoms * 3 else: # general case return self.featurizer.dimension() def _get_memory_per_frame(self): """ Returns the memory requirements per frame, in bytes :return: """ return 4 * self.dimension() def _get_constant_memory(self): """ Returns the constant memory requirements, in bytes :return: """ return 0 def _map_to_memory(self, stride=1): # TODO: stride is currently not implemented if stride > 1: raise NotImplementedError('stride option for FeatureReader._map_to_memory is currently not implemented') self._reset() # iterate over trajectories last_chunk = False itraj = 0 while not last_chunk: last_chunk_in_traj = False t = 0 while not last_chunk_in_traj: y = self._next_chunk() assert y is not None L = np.shape(y)[0] # last chunk in traj? last_chunk_in_traj = (t + L >= self.trajectory_length(itraj)) # last chunk? last_chunk = ( last_chunk_in_traj and itraj >= self.number_of_trajectories() - 1) # write self._Y[itraj][t:t + L] = y # increment time t += L # increment trajectory itraj += 1 def _create_iter(self, filename, skip=0, stride=1): return patches.iterload(filename, chunk=self.chunksize, top=self.topfile, skip=skip, stride=stride) def _reset(self, stride=1): """ resets the chunk reader """ self._itraj = 0 self._curr_lag = 0 if len(self.trajfiles) >= 1: self._t = 0 self._mditer = self._create_iter(self.trajfiles[0], stride=stride) def _next_chunk(self, lag=0, stride=1): """ gets the next chunk. If lag > 0, we open another iterator with same chunk size and advance it by one, as soon as this method is called with a lag > 0. :return: a feature mapped vector X, or (X, Y) if lag > 0 """ chunk = self._mditer.next() shape = chunk.xyz.shape if lag > 0: if self._curr_lag == 0: # lag time or trajectory index changed, so open lagged iterator if __debug__: self._logger.debug("open time lagged iterator for traj %i with lag %i" % (self._itraj, self._curr_lag)) self._curr_lag = lag self._mditer2 = self._create_iter(self.trajfiles[self._itraj], skip=self._curr_lag*stride, stride=stride) try: adv_chunk = self._mditer2.next() except StopIteration: # When _mditer2 ran over the trajectory end, return empty chunks. adv_chunk = mdtraj.Trajectory(np.empty((0, shape[1], shape[2]), np.float32), chunk.topology) self._t += shape[0] if (self._t >= self.trajectory_length(self._itraj, stride=stride) and self._itraj < len(self.trajfiles) - 1): if __debug__: self._logger.debug('closing current trajectory "%s"' % self.trajfiles[self._itraj]) self._mditer.close() if self._curr_lag != 0: self._mditer2.close() self._t = 0 self._itraj += 1 self._mditer = self._create_iter(self.trajfiles[self._itraj], stride=stride) # we open self._mditer2 only if requested due lag parameter! self._curr_lag = 0 if (self._t >= self.trajectory_length(self._itraj, stride=stride) and self._itraj == len(self.trajfiles) - 1): if __debug__: self._logger.debug('closing last trajectory "%s"' % self.trajfiles[self._itraj]) self._mditer.close() if self._curr_lag != 0: self._mditer2.close() # map data if lag == 0: if len(self.featurizer.active_features) == 0: shape_2d = (shape[0], shape[1] * shape[2]) return chunk.xyz.reshape(shape_2d) else: return self.featurizer.map(chunk) else: if len(self.featurizer.active_features) == 0: shape_Y = adv_chunk.xyz.shape X = chunk.xyz.reshape((shape[0], shape[1] * shape[2])) Y = adv_chunk.xyz.reshape((shape_Y[0], shape_Y[1] * shape_Y[2])) else: X = self.featurizer.map(chunk) Y = self.featurizer.map(adv_chunk) return X, Y