def __init__(self,S,atomindices=None,permuteindices=None): super(LPTraj,self).__init__(S) aidx = list(atomindices) if atomindices != None else [] pidx = list(itertools.chain(*permuteindices)) if permuteindices != None else [] if atomindices == None: self.TD = RMSD.TheoData(S['XYZList']) else: self.TD = RMSD.TheoData(S['XYZList'][:,np.array(aidx)])
def run(pdb, traj, atom_indices): # you could replace this with your own metric if you like metric = RMSD(atom_indices) ppdb = metric.prepare_trajectory(pdb) ptraj = metric.prepare_trajectory(traj) distances = metric.one_to_all(ppdb, ptraj, 0) return distances
def run(project, pdb, atom_indices): distances = -1 * np.ones((project.n_trajs, np.max(project.n_trajs))) rmsd = RMSD(atom_indices) ppdb = rmsd.prepare_trajectory(pdb) for i in xrange(project.n_trajs): ptraj = rmsd.prepare_trajectory(project.load_traj(i)) d = rmsd.one_to_all(ppdb, ptraj, 0) distances[i, 0:len(d)] = d return distances
def test_gpurmsd(): traj = Trajectory.load_trajectory_file(trj_path) gpurmsd = GPURMSD() ptraj = gpurmsd.prepare_trajectory(traj) gpurmsd._gpurmsd.print_params() gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0) cpurmsd = RMSD() ptraj = cpurmsd.prepare_trajectory(traj) cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0) npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
def __init__(self): ''' Create an empty Voronoi Tessalation object NOTES ''' self.storage = None self._generator = None self.metric = RMSD(None) self.atom_indices = None self.snapshot_distances = None self.snapshot_indices = None
def test_lprmsd(): t = Trajectory.load_trajectory_file('trj0.lh5') MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18]) lprmsd = LPRMSD(atomindices=MyIdx, debug=True) lptraj = lprmsd.prepare_trajectory(t) dists = lprmsd.one_to_all(lptraj, lptraj, 0) lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True) lptraj_alt = lprmsd_alt.prepare_trajectory(t) dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0) rmsd = RMSD(atomindices=MyIdx) reftraj = rmsd.prepare_trajectory(t) ref_dists = rmsd.one_to_all(reftraj, reftraj, 0) npt.assert_array_almost_equal(dists, ref_dists) npt.assert_array_almost_equal(dists_alt, ref_dists)
def __init__(self, structure_or_filename, max_rmsd, atom_indices=None): """Create an RMSD validator Parameters ---------- structure_or_filename : {msmbuilder.Trajectory, str} The structure to measure distances to, either as a trajectory (the first frame is the only one that counts) or a path to a trajectory on disk that can be loaded max_rmsd : float The threshold rmsd atom_indices : np.array [ndim=1, dtype=int] The indices over which you want to measure RMSD """ metric = RMSD(atom_indices) super(RMSDExplosionValidator, self).__init__(structure_or_filename, metric, max_rmsd)
class VoronoiTesselation(object): ''' Hold the MSM Clustering description and the associated state assignments to decide if a trajectory has hit a core Notes ----- ''' def __init__(self): ''' Create an empty Voronoi Tessalation object NOTES ''' self.storage = None self._generator = None self.metric = RMSD(None) self.atom_indices = None self.snapshot_distances = None self.snapshot_indices = None ################################################################################ def update_cluster_from_storage(self): ''' Update the set of generators from the associates trajectory storage Notes ----- ''' traj = self.storage.all_snapshot_coordinates_as_mdtraj( self.atom_indices ) args = Object() args.hybrid_local_num_iters = 50 args.hybrid_global_iters = 0 args.hybrid_ignore_max_objective = False args.hybrid_too_close_cutoff = 0.0001 args.hybrid_num_clusters = self.n_centers args.hybrid_distance_cutoff = None ptrajs = None clusterer = clustering.HybridKMedoids( self.metric, trajectories=traj, prep_trajectories=ptrajs, k=args.hybrid_num_clusters, distance_cutoff=args.hybrid_distance_cutoff, local_num_iters=args.hybrid_local_num_iters, global_num_iters=args.hybrid_global_iters, too_close_cutoff=args.hybrid_too_close_cutoff, ignore_max_objective=args.hybrid_ignore_max_objective ) gen_inds = clusterer.get_generator_indices() self.generators_indices = gen_inds self.generators = traj[gen_inds] return @property def size(self): ''' Return the number of generators used in the tesselation Returns ------- length : int number of generators ''' return len(self.generators) def assign_storage(self): ''' Assign all snapshots in the associates trajectory storage to the generators Notes ----- This allows later to access everything fast ''' traj = self.storage.all_snapshot_coordinates_as_mdtraj( self.atom_indices ) n_frames = len(traj) assignments = -1 * np.ones(n_frames, dtype='int') distances = -1 * np.ones(n_frames, dtype='float32') pgens = self.metric.prepare_trajectory(self.generators) ptraj = self.metric.prepare_trajectory(traj) for j in xrange(len(traj)): d = self.metric.one_to_all(ptraj, pgens, j) assignments[j] = np.argmin(d) distances[j] = d[assignments[j]] self.snapshot_indices = np.array(assignments, dtype='int') self.snapshot_distances = np.array(distances, dtype='float') return def assign_all_trajectories(self): ''' Assign all trajectories in the associates trajectory storage to the generators Returns ------- clusterlist : list of int list of cluster IDs Notes ----- This needs assign_storage() to be run before! ''' return [ self.snapshot_indices[t] for t in self.storage.all_trajectory_indices() ] def assign_index_trajectory(self, indices): ''' Assign snapshots with IDs indices to the generators Returns ------- clusterlist (list of int) - list of cluster IDs NOTES ''' if self.snapshot_indices is not None: return self.snapshot_indices[indices] else: return None def assign_snapshot(self, snapshot): ''' Assign a single snapshot to the cluster centers Returns ------- assignment : int cluster IDs distance: float distance to cluster _generator in measure of the metric (RMSD) ''' assignments, distances = self.assign(Trajectory([snapshot])) return assignments[0], distances[0] def assign(self, traj, recalc = False): ''' Assign a Trajectory object to the cluster Parameters ---------- traj : Trajectory trajectory to be clustered recalc : bool forces a calculation of the cluster center and not using the cached assignments (Default False) RETURNS assignments (numpy.array(n_frames, dtype='int')) - array of cluster IDs distances (numpy.array(n_frames, dtype='float')) - distances to cluster _generator in measure of the metric (RMSD) ''' n_frames = len(traj) if self.snapshot_indices is not None: # We do not check if the snapshot_indices are properly updated! # Checking might be too expensive indices = traj.indices() assignments = self.snapshot_indices[indices] distances = self.snapshot_distances[indices] else: assignments = -1 * np.ones(n_frames, dtype='int') distances = -1 * np.ones(n_frames, dtype='float32') pgens = self.metric.prepare_trajectory( self.generators ) ptraj = self.metric.prepare_trajectory( traj.subset(self.atom_indices).md() ) for j in xrange(len(traj)): d = self.metric.one_to_all(ptraj, pgens, j) assignments[j] = np.argmin(d) distances[j] = d[assignments[j]] return assignments, distances
def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride, max_rmsd, min_gens, center_conformations, memory_check, omp_parallel_rmsd=True): """ This function takes in a path to a CLONE and merges all the XTC files it finds into a LH5 trajectory: Parameters ---------- clone_dir : str the directory in which the xtc files are found. All of the xtc files in this directory are joined together to make a single trajectory (.lh5) output file output_dir : str directory where the outputted files will be placed trajectory_number : int A unique number for this trajectory. This number is used in constructing the filename to write the outputted .lh5 trajectory to, and thus must be unique stride: int Subsample by only considering every Nth snapshop. max_rmsd: {int, None} if this value is not None, calculate the RMSD to the pdb_file from each snapshot and reject trajectories which have snapshots with RMSD greated than max_rmsd. If None, no check is performed min_gens : int Discard the trajectories that contain fewer than `min_gens` XTC files. center_conformations : bool center conformations before saving. memory_check : bool if yes, uses the memory dictionary to do an update rather than a complete re-convert. omp_parallel_rmsd : bool If true, use OpenMP accelerated RMSD calculation for max_rmsd check """ xtc_files = self.list_xtcs_in_dir(clone_dir) # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 -- # into a trajectory. If there are gaps in the xtc files in the directory, we only # want to use the the ones such that they are contiguously numbered i = 0 for i, filename in enumerate(xtc_files): if self.integer_component(filename) != i: logger.error( "Found discontinuity in xtc numbering - check data in %s", clone_dir) xtc_files = xtc_files[0:i] break # check the memory object to see which xtc files have already been converted, and # exclude those from this conversion if memory_check: if clone_dir in self.memory.keys(): previous_convert_exists = True num_xtcs_converted = self.memory[clone_dir][1] if len( xtc_files ) == num_xtcs_converted: # if we have converted everything, logger.info( "Already converted all files in %s, skipping...", clone_dir) return # just bail out else: xtc_files = xtc_files[num_xtcs_converted:] else: previous_convert_exists = False else: previous_convert_exists = False xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files] logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir) if len(xtc_files) <= min_gens: logger.info("Skipping trajectory in clone_dir = %s", clone_dir) logger.info("Too few xtc files (generations).") return try: # [this should check for and discard overlapping snapshots] trajectory = Trajectory.load_from_xtc( xtc_file_paths, PDBFilename=self.pdb_topology, discard_overlapping_frames=True) except IOError as e: logger.error( "IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir) logger.error( "Attempting rescue by disregarding final frame, which is often" ) logger.error("the first/only frame to be corrupted") if len(xtc_file_paths) == 1: logger.error( "Didn't find any other frames in %s, continuing...", clone_dir) return try: trajectory = Trajectory.load_from_xtc( xtc_file_paths[0:-1], PDBFilename=self.pdb_topology) except IOError: logger.error( "Unfortunately, the error remained even after ignoring the final frame." ) logger.error("Skipping the trajectory in clone_dir = %s", clone_dir) return else: logger.error( "Sucessfully recovered from IOError by disregarding final frame." ) if max_rmsd is not None: atomindices = [int(i) - 1 for i in trajectory['AtomID']] rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd) ppdb = rmsdmetric.prepare_trajectory( Trajectory.load_trajectory_file(self.pdb_topology)) ptraj = rmsdmetric.prepare_trajectory(trajectory) rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0) if max(rmsds) > max_rmsd: logger.warning("Snapshot %d RMSD %f > the %f cutoff", argmax(rmsds), max(rmsds), max_rmsd) logger.warning("Dropping trajectory") return if center_conformations: RMSD.TheoData.centerConformations(trajectory["XYZList"]) # if we are adding to a previous trajectory, we have to load that traj up and extend it if previous_convert_exists: output_filename = self.memory[clone_dir][0] output_file_path = output_filename logger.info("Extending: %s", output_filename) assert os.path.exists(output_filename) # load the traj and extend it [this should check for and discard overlapping snapshots] Trajectory.append_frames_to_file(output_filename, trajectory['XYZList'][::stride], discard_overlapping_frames=True) num_xtcs_processed = len( xtc_file_paths) + self.memory[clone_dir][1] # if we are not adding to a traj, then we create a new one else: output_filename = 'trj%s.lh5' % trajectory_number output_file_path = os.path.join(output_dir, output_filename) if os.path.exists(output_file_path): logger.info("The file name %s already exists. Skipping it.", output_file_path) return # stide and discard by snapshot trajectory['XYZList'] = trajectory['XYZList'][::stride] trajectory.save(output_file_path) num_xtcs_processed = len(xtc_file_paths) # log what we did into the memory object self.memory[clone_dir] = [output_file_path, num_xtcs_processed] return
target = c_xyzlist[which, :, :] progressive = False for i in range(1, len(xyzlist)): if progressive: target = c_xyzlist[i - i] rmsd, operator = kabsch(c_xyzlist[i], target, operator=True) c_xyzlist[i] = operator(c_xyzlist[i]) return c_xyzlist if __name__ == '__main__': "Some test code" N = 40 query = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3) target = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3) dist, op = kabsch(query, target) print('my rmsd ', dist) from msmbuilder.metrics import RMSD _rmsdcalc = RMSD() t0 = RMSD.TheoData(query[np.newaxis, :, :]) t1 = RMSD.TheoData(target[np.newaxis, :, :]) print('msmbuilder rmsd', _rmsdcalc.one_to_all(t0, t1, 0)[0]) print(np.sqrt(np.sum(np.square(target - op(query))) / N))
def cpudist(t): rmsd = RMSD() pt = rmsd.prepare_trajectory(t) return rmsd.one_to_all(pt, pt, 0)
else: target = c_xyzlist[which, :, :] progressive = False for i in range(1, len(xyzlist)): if progressive: target = c_xyzlist[i-i] rmsd, operator = kabsch(c_xyzlist[i], target, operator=True) c_xyzlist[i] = operator(c_xyzlist[i]) return c_xyzlist if __name__ == '__main__': "Some test code" N = 40 query = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3) target = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3) dist, op = kabsch(query, target) print 'my rmsd ', dist from msmbuilder.metrics import RMSD _rmsdcalc = RMSD() t0 = RMSD.TheoData(query[np.newaxis, :, :]) t1 = RMSD.TheoData(target[np.newaxis, :, :]) print 'msmbuilder rmsd', _rmsdcalc.one_to_all(t0, t1, 0)[0] print np.sqrt(np.sum(np.square(target - op(query))) / N)
def construct_metric(args): if args.metric == 'rmsd': if args.rmsd_atom_indices != 'all': atom_indices = np.loadtxt(args.rmsd_atom_indices, np.int) else: atom_indices = None metric = RMSD(atom_indices) #, omp_parallel=args.rmsd_omp_parallel) elif args.metric == 'dihedral': metric = Dihedral(metric=args.dihedral_metric, p=args.dihedral_p, angles=args.dihedral_angles) elif args.metric == 'contact': if args.contact_which != 'all': contact_which = np.loadtxt(args.contact_which, np.int) else: contact_which = 'all' if args.contact_cutoff_file != None: #getattr(args, 'contact_cutoff_file'): contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float) elif args.contact_cutoff != None: contact_cutoff = float(args.contact_cutoff) else: contact_cutoff = None if contact_cutoff != None and contact_cutoff < 0: metric = ContinuousContact(contacts=contact_which, scheme=args.contact_scheme) else: metric = BooleanContact(contacts=contact_which, cutoff=contact_cutoff, scheme=args.contact_scheme) elif args.metric == 'atompairs': if args.atompairs_which != None: pairs = np.loadtxt(args.atompairs_which, np.int) else: pairs = None metric = AtomPairs(metric=args.atompairs_metric, p=args.atompairs_p, atom_pairs=pairs) elif args.metric == 'custom': with open(args.picklemetric_input) as f: metric = pickle.load(f) print '#' * 80 print 'Loaded custom metric:' print metric print '#' * 80 else: # apply the constructor on args and take the first non-none element # note that using these itertools constructs, we'll only actual # execute the constructor until the match is achieved metrics = itertools.imap(lambda c: c(args), locate_metric_plugins('construct_metric')) try: metric = itertools.dropwhile(lambda c: not c, metrics).next() except StopIteration: # This means that none of the plugins acceptedthe metric raise RuntimeError( "Bad metric. Could not be constructed by any built-in or plugin metric. Perhaps you have a poorly written plugin?" ) if not isinstance(metric, AbstractDistanceMetric): return ValueError("%s is not a AbstractDistanceMetric" % metric) return metric
def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride, max_rmsd, min_gens, center_conformations, memory_check, omp_parallel_rmsd=True): """ This function takes in a path to a CLONE and merges all the XTC files it finds into a H5 trajectory: Parameters ---------- clone_dir : str the directory in which the xtc files are found. All of the xtc files in this directory are joined together to make a single trajectory (.h5) output file output_dir : str directory where the outputted files will be placed trajectory_number : int A unique number for this trajectory. This number is used in constructing the filename to write the outputted .h5 trajectory to, and thus must be unique stride: int Subsample by only considering every Nth snapshop. max_rmsd: {int, None} if this value is not None, calculate the RMSD to the pdb_file from each snapshot and reject trajectories which have snapshots with RMSD greated than max_rmsd. If None, no check is performed min_gens : int Discard the trajectories that contain fewer than `min_gens` XTC files. center_conformations : bool center conformations before saving. memory_check : bool if yes, uses the memory dictionary to do an update rather than a complete re-convert. omp_parallel_rmsd : bool If true, use OpenMP accelerated RMSD calculation for max_rmsd check """ xtc_files = self.list_xtcs_in_dir(clone_dir) # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 -- # into a trajectory. If there are gaps in the xtc files in the directory, we only # want to use the the ones such that they are contiguously numbered i = 0 for i, filename in enumerate(xtc_files): if self.integer_component(filename) != i: logger.error("Found discontinuity in xtc numbering - check data in %s", clone_dir) xtc_files = xtc_files[0:i] break # check the memory object to see which xtc files have already been converted, and # exclude those from this conversion if memory_check: if clone_dir in self.memory.keys(): previous_convert_exists = True num_xtcs_converted = self.memory[clone_dir][1] if len(xtc_files) == num_xtcs_converted: # if we have converted everything, logger.info("Already converted all files in %s, skipping...", clone_dir) return # just bail out else: xtc_files = xtc_files[num_xtcs_converted:] else: previous_convert_exists = False else: previous_convert_exists = False xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files] logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir) if len(xtc_files) <= min_gens: logger.info("Skipping trajectory in clone_dir = %s", clone_dir) logger.info("Too few xtc files (generations).") return try: # [this should check for and discard overlapping snapshots] trajectory = Trajectory.load_from_xtc(xtc_file_paths, PDBFilename=self.pdb_topology, discard_overlapping_frames=True) except IOError as e: logger.error("IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir) logger.error("Attempting rescue by disregarding final frame, which is often") logger.error("the first/only frame to be corrupted") if len(xtc_file_paths) == 1: logger.error("Didn't find any other frames in %s, continuing...", clone_dir) return try: trajectory = Trajectory.load_from_xtc(xtc_file_paths[0:-1], PDBFilename=self.pdb_topology) except IOError: logger.error("Unfortunately, the error remained even after ignoring the final frame.") logger.error("Skipping the trajectory in clone_dir = %s", clone_dir) return else: logger.error("Sucessfully recovered from IOError by disregarding final frame.") if max_rmsd is not None: atomindices = [ int(i)-1 for i in trajectory['AtomID'] ] rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd) ppdb = rmsdmetric.prepare_trajectory(Trajectory.load_trajectory_file(self.pdb_topology)) ptraj = rmsdmetric.prepare_trajectory(trajectory) rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0) if max(rmsds) > max_rmsd: logger.warning("Snapshot %d RMSD %f > the %f cutoff" , argmax(rmsds), max(rmsds), max_rmsd) logger.warning("Dropping trajectory") return if center_conformations: RMSD.TheoData.centerConformations(trajectory["XYZList"]) # if we are adding to a previous trajectory, we have to load that traj up and extend it if previous_convert_exists: output_filename = self.memory[clone_dir][0] output_file_path = output_filename logger.info("Extending: %s", output_filename) assert os.path.exists( output_filename ) # load the traj and extend it [this should check for and discard overlapping snapshots] Trajectory.append_frames_to_file( output_filename, trajectory['XYZList'][::stride], discard_overlapping_frames=True ) num_xtcs_processed = len(xtc_file_paths) + self.memory[clone_dir][1] # if we are not adding to a traj, then we create a new one else: output_filename = 'trj%s.h5' % trajectory_number output_file_path = os.path.join(output_dir, output_filename) if os.path.exists(output_file_path): logger.info("The file name %s already exists. Skipping it.", output_file_path) return # stide and discard by snapshot trajectory['XYZList'] = trajectory['XYZList'][::stride] trajectory.save(output_file_path) num_xtcs_processed = len(xtc_file_paths) # log what we did into the memory object self.memory[clone_dir] = [ output_file_path, num_xtcs_processed ] return
def construct_metric(args): metric_name = args.metric if metric_name == 'rmsd': if args.rmsd_atom_indices != 'all': atom_indices = np.loadtxt(args.rmsd_atom_indices, np.int) else: atom_indices = None metric = RMSD(atom_indices) # , omp_parallel=args.rmsd_omp_parallel) elif metric_name == 'dihedral': metric = Dihedral(metric=args.dihedral_metric, p=args.dihedral_p, angles=args.dihedral_angles, userfilename=args.dihedral_userfilename) elif metric_name == 'contact': if args.contact_which != 'all': contact_which = np.loadtxt(args.contact_which, np.int) else: contact_which = 'all' if args.contact_cutoff_file != None: contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float) elif args.contact_cutoff != None: contact_cutoff = float(args.contact_cutoff) else: contact_cutoff = None if contact_cutoff != None and contact_cutoff < 0: metric = ContinuousContact(contacts=contact_which, scheme=args.contact_scheme) else: metric = BooleanContact(contacts=contact_which, cutoff=contact_cutoff, scheme=args.contact_scheme) elif metric_name == 'atompairs': if args.atompairs_which != None: pairs = np.loadtxt(args.atompairs_which, np.int) else: pairs = None metric = AtomPairs(metric=args.atompairs_metric, p=args.atompairs_p, atom_pairs=pairs) elif metric_name == 'positions': target = md.load(args.target) if args.pos_atom_indices != None: atom_indices = np.loadtxt(args.pos_atom_indices, np.int) else: atom_indices = None if args.align_indices != None: align_indices = np.loadtxt(args.align_indices, np.int) else: align_indices = None metric = Positions(target, atom_indices=atom_indices, align_indices=align_indices, metric=args.positions_metric, p=args.positions_p) elif metric_name == "tica": tica_obj = tICA.load(args.tica_fn) metric = RedDimPNorm(tica_obj, num_vecs=args.num_vecs, metric=args.projected_metric, p=args.p) elif metric_name == 'custom': with open(args.picklemetric_input) as f: metric = pickle.load(f) print('#' * 80) print('Loaded custom metric:') print(metric) print('#' * 80) else: # apply the constructor on args and take the first non-none element # note that using these itertools constructs, we'll only actual # execute the constructor until the match is achieved metrics = [c(args) for c in locate_metric_plugins('construct_metric')] try: metric = next(itertools.dropwhile(lambda c: not c, metrics)) except StopIteration: # This means that none of the plugins acceptedthe metric raise RuntimeError( "Bad metric. Could not be constructed by any built-in or plugin metric. Perhaps you have a poorly written plugin?" ) if not isinstance(metric, AbstractDistanceMetric): return ValueError("%s is not a AbstractDistanceMetric" % metric) return metric