Esempio n. 1
0
 def __init__(self,S,atomindices=None,permuteindices=None):
     super(LPTraj,self).__init__(S)
     aidx = list(atomindices) if atomindices != None else []
     pidx = list(itertools.chain(*permuteindices)) if permuteindices != None else []
     
     if atomindices == None:
         self.TD = RMSD.TheoData(S['XYZList'])
     else:
         self.TD = RMSD.TheoData(S['XYZList'][:,np.array(aidx)])
Esempio n. 2
0
def run(pdb, traj, atom_indices):

    # you could replace this with your own metric if you like
    metric = RMSD(atom_indices)

    ppdb = metric.prepare_trajectory(pdb)
    ptraj = metric.prepare_trajectory(traj)
    distances = metric.one_to_all(ppdb, ptraj, 0)
    
    return distances
def run(project, pdb, atom_indices):    
    distances = -1 * np.ones((project.n_trajs, np.max(project.n_trajs)))
    rmsd = RMSD(atom_indices)
    ppdb = rmsd.prepare_trajectory(pdb)
    
    for i in xrange(project.n_trajs):
        ptraj = rmsd.prepare_trajectory(project.load_traj(i))
        d = rmsd.one_to_all(ppdb, ptraj, 0)
        distances[i, 0:len(d)] = d
    
    return distances
def run(project, pdb, atom_indices):
    distances = -1 * np.ones((project.n_trajs, np.max(project.n_trajs)))
    rmsd = RMSD(atom_indices)
    ppdb = rmsd.prepare_trajectory(pdb)

    for i in xrange(project.n_trajs):
        ptraj = rmsd.prepare_trajectory(project.load_traj(i))
        d = rmsd.one_to_all(ppdb, ptraj, 0)
        distances[i, 0:len(d)] = d

    return distances
Esempio n. 5
0
def test_gpurmsd():
    traj = Trajectory.load_trajectory_file(trj_path)

    gpurmsd = GPURMSD()
    ptraj = gpurmsd.prepare_trajectory(traj)
    gpurmsd._gpurmsd.print_params()
    gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0)

    cpurmsd = RMSD()
    ptraj = cpurmsd.prepare_trajectory(traj)
    cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)

    npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
Esempio n. 6
0
def test_gpurmsd():
    traj = Trajectory.load_trajectory_file(trj_path)    

    gpurmsd = GPURMSD()
    ptraj = gpurmsd.prepare_trajectory(traj)
    gpurmsd._gpurmsd.print_params()
    gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0)

    cpurmsd = RMSD()
    ptraj = cpurmsd.prepare_trajectory(traj)
    cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)
    
    npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
Esempio n. 7
0
 def __init__(self):
     '''
     Create an empty Voronoi Tessalation object
     
     NOTES
     
     '''
     
     self.storage = None
     self._generator = None
     self.metric = RMSD(None)
     self.atom_indices = None
     self.snapshot_distances = None
     self.snapshot_indices = None
Esempio n. 8
0
def test_lprmsd():
    t = Trajectory.load_trajectory_file('trj0.lh5')

    MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18])

    lprmsd = LPRMSD(atomindices=MyIdx, debug=True)

    lptraj = lprmsd.prepare_trajectory(t)

    dists = lprmsd.one_to_all(lptraj, lptraj, 0)

    lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True)
    lptraj_alt = lprmsd_alt.prepare_trajectory(t)
    dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0)

    rmsd = RMSD(atomindices=MyIdx)
    reftraj = rmsd.prepare_trajectory(t)
    ref_dists = rmsd.one_to_all(reftraj, reftraj, 0)

    npt.assert_array_almost_equal(dists, ref_dists)
    npt.assert_array_almost_equal(dists_alt, ref_dists)
Esempio n. 9
0
def test_lprmsd():
    t = Trajectory.load_trajectory_file('trj0.lh5')

    MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18])

    lprmsd = LPRMSD(atomindices=MyIdx, debug=True)

    lptraj = lprmsd.prepare_trajectory(t)

    dists = lprmsd.one_to_all(lptraj, lptraj, 0)

    lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True)
    lptraj_alt = lprmsd_alt.prepare_trajectory(t)
    dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0)

    rmsd = RMSD(atomindices=MyIdx)
    reftraj = rmsd.prepare_trajectory(t)
    ref_dists = rmsd.one_to_all(reftraj, reftraj, 0)

    
    npt.assert_array_almost_equal(dists, ref_dists)
    npt.assert_array_almost_equal(dists_alt, ref_dists)
Esempio n. 10
0
 def __init__(self, structure_or_filename, max_rmsd, atom_indices=None):
     """Create an RMSD validator
     
     Parameters
     ----------
     structure_or_filename : {msmbuilder.Trajectory, str}
         The structure to measure distances to, either as a trajectory (the first
         frame is the only one that counts) or a path to a trajectory
         on disk that can be loaded
     max_rmsd : float
         The threshold rmsd
     atom_indices : np.array [ndim=1, dtype=int]
         The indices over which you want to measure RMSD
     """
     metric = RMSD(atom_indices)
     super(RMSDExplosionValidator, self).__init__(structure_or_filename,
                                                  metric, max_rmsd)
Esempio n. 11
0
class VoronoiTesselation(object):
    '''
    Hold the MSM Clustering description and the associated state assignments to decide if a trajectory has hit a core
    
    Notes
    -----
    
    
    '''

    def __init__(self):
        '''
        Create an empty Voronoi Tessalation object
        
        NOTES
        
        '''
        
        self.storage = None
        self._generator = None
        self.metric = RMSD(None)
        self.atom_indices = None
        self.snapshot_distances = None
        self.snapshot_indices = None
    
    ################################################################################

    def update_cluster_from_storage(self):
        '''
        Update the set of generators from the associates trajectory storage
        
        Notes
        -----
        
        '''
        traj = self.storage.all_snapshot_coordinates_as_mdtraj( self.atom_indices )
        
        args = Object()
        
        args.hybrid_local_num_iters = 50
        args.hybrid_global_iters = 0
        args.hybrid_ignore_max_objective = False
        args.hybrid_too_close_cutoff = 0.0001
        args.hybrid_num_clusters = self.n_centers
        args.hybrid_distance_cutoff = None

        ptrajs = None
    
        clusterer = clustering.HybridKMedoids(
            self.metric, 
            trajectories=traj,
            prep_trajectories=ptrajs, 
            k=args.hybrid_num_clusters,
            distance_cutoff=args.hybrid_distance_cutoff,
            local_num_iters=args.hybrid_local_num_iters,
            global_num_iters=args.hybrid_global_iters,
            too_close_cutoff=args.hybrid_too_close_cutoff,
            ignore_max_objective=args.hybrid_ignore_max_objective
        )
    
        gen_inds = clusterer.get_generator_indices()

        self.generators_indices = gen_inds
        self.generators = traj[gen_inds]
        
        return
    
    @property
    def size(self):
        '''
        Return the number of generators used in the tesselation
        
        Returns
        -------
        length : int
            number of generators

        '''

        return len(self.generators)

    def assign_storage(self):
        '''
        Assign all snapshots in the associates trajectory storage to the generators
                
        Notes
        -----
        This allows later to access everything fast
        '''
        
        traj = self.storage.all_snapshot_coordinates_as_mdtraj( self.atom_indices )
        
        n_frames = len(traj)
    
        assignments = -1 * np.ones(n_frames, dtype='int')
        distances = -1 * np.ones(n_frames, dtype='float32')
    
        pgens = self.metric.prepare_trajectory(self.generators)    
        ptraj = self.metric.prepare_trajectory(traj)

        for j in xrange(len(traj)):
            d = self.metric.one_to_all(ptraj, pgens, j)
            assignments[j] = np.argmin(d)
            distances[j] = d[assignments[j]]
        
        self.snapshot_indices = np.array(assignments, dtype='int')
        self.snapshot_distances = np.array(distances, dtype='float')
        
        return
    
    def assign_all_trajectories(self):
        '''
        Assign all trajectories in the associates trajectory storage to the generators
        
        Returns
        -------
        clusterlist : list of int
            list of cluster IDs
        
        Notes
        -----        
        This needs assign_storage() to be run before!
        
        '''

        return [ self.snapshot_indices[t] for t in self.storage.all_trajectory_indices() ]

    def assign_index_trajectory(self, indices):
        '''
        Assign snapshots with IDs indices to the generators
        
        Returns
        -------
        clusterlist (list of int) - list of cluster IDs
        
        NOTES
        
        '''

        if self.snapshot_indices is not None:
            return self.snapshot_indices[indices]
        else:
            return None
            
    def assign_snapshot(self, snapshot):
        '''
        Assign a single snapshot to the cluster centers
        
        Returns
        -------
        assignment : int
            cluster IDs
        distance: float
            distance to cluster _generator in measure of the metric (RMSD)
        
        '''
        
        assignments, distances = self.assign(Trajectory([snapshot]))    
        return assignments[0], distances[0]

    def assign(self, traj, recalc = False):
        '''
        Assign a Trajectory object to the cluster
        
        Parameters
        ----------        
        traj : Trajectory
            trajectory to be clustered
        recalc : bool
            forces a calculation of the cluster center and not using the cached assignments (Default False)
        
        RETURNS
        
        assignments (numpy.array(n_frames, dtype='int')) - array of cluster IDs
        distances (numpy.array(n_frames, dtype='float')) - distances to cluster _generator in measure of the metric (RMSD)
                
        '''

        n_frames = len(traj)
            
        if self.snapshot_indices is not None:
            # We do not check if the snapshot_indices are properly updated!
            # Checking might be too expensive
            indices = traj.indices()
            assignments = self.snapshot_indices[indices]
            distances = self.snapshot_distances[indices]
            
        else:
            assignments = -1 * np.ones(n_frames, dtype='int')
            distances = -1 * np.ones(n_frames, dtype='float32')

            pgens = self.metric.prepare_trajectory( self.generators )    
            ptraj = self.metric.prepare_trajectory( traj.subset(self.atom_indices).md() )
    
            for j in xrange(len(traj)):
                d = self.metric.one_to_all(ptraj, pgens, j)
                assignments[j] = np.argmin(d)
                distances[j] = d[assignments[j]]

        return assignments, distances
Esempio n. 12
0
    def write_trajectory(self,
                         clone_dir,
                         output_dir,
                         trajectory_number,
                         stride,
                         max_rmsd,
                         min_gens,
                         center_conformations,
                         memory_check,
                         omp_parallel_rmsd=True):
        """
        This function takes in a path to a CLONE and merges all the XTC files
        it finds into a LH5 trajectory:

        Parameters
        ----------
        clone_dir : str
            the directory in which the xtc files are found. All of the xtc files
            in this directory are joined together to make a single trajectory
            (.lh5) output file

        output_dir : str
            directory where the outputted files will be placed

        trajectory_number : int
            A unique number for this trajectory. This number is used in
            constructing the filename to write the outputted .lh5 trajectory to,
            and thus must be unique

        stride: int
            Subsample by only considering every Nth snapshop.
        max_rmsd: {int, None}
            if this value is not None, calculate the RMSD to the pdb_file from
            each snapshot and reject trajectories which have snapshots with RMSD
            greated than max_rmsd. If None, no check is performed

        min_gens : int
            Discard the trajectories that contain fewer than `min_gens` XTC files.

        center_conformations : bool
            center conformations before saving.

        memory_check : bool
            if yes, uses the memory dictionary to do an update rather than a
            complete re-convert.

        omp_parallel_rmsd : bool
            If true, use OpenMP accelerated RMSD calculation for max_rmsd check
        """

        xtc_files = self.list_xtcs_in_dir(clone_dir)

        # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 --
        # into a trajectory. If there are gaps in the xtc files in the directory, we only
        # want to use the the ones such that they are contiguously numbered
        i = 0
        for i, filename in enumerate(xtc_files):
            if self.integer_component(filename) != i:
                logger.error(
                    "Found discontinuity in xtc numbering - check data in %s",
                    clone_dir)
                xtc_files = xtc_files[0:i]
                break

        # check the memory object to see which xtc files have already been converted, and
        # exclude those from this conversion
        if memory_check:
            if clone_dir in self.memory.keys():
                previous_convert_exists = True
                num_xtcs_converted = self.memory[clone_dir][1]
                if len(
                        xtc_files
                ) == num_xtcs_converted:  # if we have converted everything,
                    logger.info(
                        "Already converted all files in %s, skipping...",
                        clone_dir)
                    return  # just bail out
                else:
                    xtc_files = xtc_files[num_xtcs_converted:]
            else:
                previous_convert_exists = False
        else:
            previous_convert_exists = False

        xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files]

        logger.info("Processing %d xtc files in clone_dir = %s",
                    len(xtc_files), clone_dir)

        if len(xtc_files) <= min_gens:
            logger.info("Skipping trajectory in clone_dir = %s", clone_dir)
            logger.info("Too few xtc files (generations).")
            return

        try:
            # [this should check for and discard overlapping snapshots]
            trajectory = Trajectory.load_from_xtc(
                xtc_file_paths,
                PDBFilename=self.pdb_topology,
                discard_overlapping_frames=True)
        except IOError as e:
            logger.error(
                "IOError (%s) when processing trajectory in clone_dir = %s", e,
                clone_dir)
            logger.error(
                "Attempting rescue by disregarding final frame, which is often"
            )
            logger.error("the first/only frame to be corrupted")

            if len(xtc_file_paths) == 1:
                logger.error(
                    "Didn't find any other frames in %s, continuing...",
                    clone_dir)
                return

            try:
                trajectory = Trajectory.load_from_xtc(
                    xtc_file_paths[0:-1], PDBFilename=self.pdb_topology)
            except IOError:
                logger.error(
                    "Unfortunately, the error remained even after ignoring the final frame."
                )
                logger.error("Skipping the trajectory in clone_dir = %s",
                             clone_dir)
                return
            else:
                logger.error(
                    "Sucessfully recovered from IOError by disregarding final frame."
                )

        if max_rmsd is not None:
            atomindices = [int(i) - 1 for i in trajectory['AtomID']]
            rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd)
            ppdb = rmsdmetric.prepare_trajectory(
                Trajectory.load_trajectory_file(self.pdb_topology))
            ptraj = rmsdmetric.prepare_trajectory(trajectory)
            rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0)

            if max(rmsds) > max_rmsd:
                logger.warning("Snapshot %d RMSD %f > the %f cutoff",
                               argmax(rmsds), max(rmsds), max_rmsd)
                logger.warning("Dropping trajectory")
                return

        if center_conformations:
            RMSD.TheoData.centerConformations(trajectory["XYZList"])

        # if we are adding to a previous trajectory, we have to load that traj up and extend it
        if previous_convert_exists:
            output_filename = self.memory[clone_dir][0]
            output_file_path = output_filename
            logger.info("Extending: %s", output_filename)
            assert os.path.exists(output_filename)

            # load the traj and extend it [this should check for and discard overlapping snapshots]
            Trajectory.append_frames_to_file(output_filename,
                                             trajectory['XYZList'][::stride],
                                             discard_overlapping_frames=True)

            num_xtcs_processed = len(
                xtc_file_paths) + self.memory[clone_dir][1]

        # if we are not adding to a traj, then we create a new one
        else:
            output_filename = 'trj%s.lh5' % trajectory_number
            output_file_path = os.path.join(output_dir, output_filename)

            if os.path.exists(output_file_path):
                logger.info("The file name %s already exists. Skipping it.",
                            output_file_path)
                return

            # stide and discard by snapshot
            trajectory['XYZList'] = trajectory['XYZList'][::stride]
            trajectory.save(output_file_path)

            num_xtcs_processed = len(xtc_file_paths)

        # log what we did into the memory object
        self.memory[clone_dir] = [output_file_path, num_xtcs_processed]

        return
Esempio n. 13
0
        target = c_xyzlist[which, :, :]
        progressive = False

    for i in range(1, len(xyzlist)):
        if progressive:
            target = c_xyzlist[i - i]
        rmsd, operator = kabsch(c_xyzlist[i], target, operator=True)
        c_xyzlist[i] = operator(c_xyzlist[i])

    return c_xyzlist


if __name__ == '__main__':
    "Some test code"

    N = 40
    query = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3)
    target = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3)

    dist, op = kabsch(query, target)
    print('my rmsd        ', dist)

    from msmbuilder.metrics import RMSD

    _rmsdcalc = RMSD()
    t0 = RMSD.TheoData(query[np.newaxis, :, :])
    t1 = RMSD.TheoData(target[np.newaxis, :, :])
    print('msmbuilder rmsd', _rmsdcalc.one_to_all(t0, t1, 0)[0])

    print(np.sqrt(np.sum(np.square(target - op(query))) / N))
Esempio n. 14
0
 def cpudist(t):
     rmsd = RMSD()
     pt = rmsd.prepare_trajectory(t)
     return rmsd.one_to_all(pt, pt, 0)
Esempio n. 15
0
    else:
        target = c_xyzlist[which, :, :]
        progressive = False

    for i in range(1, len(xyzlist)):
        if progressive:
            target = c_xyzlist[i-i]
        rmsd, operator = kabsch(c_xyzlist[i], target, operator=True)
        c_xyzlist[i] = operator(c_xyzlist[i])

    return c_xyzlist


if __name__ == '__main__':
    "Some test code"

    N = 40
    query = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3)
    target = np.arange(N)[:, np.newaxis] * np.random.randn(N, 3)

    dist, op = kabsch(query, target)
    print 'my rmsd        ', dist

    from msmbuilder.metrics import RMSD
    _rmsdcalc = RMSD()
    t0 = RMSD.TheoData(query[np.newaxis, :, :])
    t1 = RMSD.TheoData(target[np.newaxis, :, :])
    print 'msmbuilder rmsd', _rmsdcalc.one_to_all(t0, t1, 0)[0]

    print np.sqrt(np.sum(np.square(target - op(query))) / N)
Esempio n. 16
0
def construct_metric(args):
    if args.metric == 'rmsd':
        if args.rmsd_atom_indices != 'all':
            atom_indices = np.loadtxt(args.rmsd_atom_indices, np.int)
        else:
            atom_indices = None
        metric = RMSD(atom_indices)  #, omp_parallel=args.rmsd_omp_parallel)

    elif args.metric == 'dihedral':
        metric = Dihedral(metric=args.dihedral_metric,
                          p=args.dihedral_p,
                          angles=args.dihedral_angles)

    elif args.metric == 'contact':
        if args.contact_which != 'all':
            contact_which = np.loadtxt(args.contact_which, np.int)
        else:
            contact_which = 'all'

        if args.contact_cutoff_file != None:  #getattr(args, 'contact_cutoff_file'):
            contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float)
        elif args.contact_cutoff != None:
            contact_cutoff = float(args.contact_cutoff)
        else:
            contact_cutoff = None

        if contact_cutoff != None and contact_cutoff < 0:
            metric = ContinuousContact(contacts=contact_which,
                                       scheme=args.contact_scheme)
        else:
            metric = BooleanContact(contacts=contact_which,
                                    cutoff=contact_cutoff,
                                    scheme=args.contact_scheme)

    elif args.metric == 'atompairs':
        if args.atompairs_which != None:
            pairs = np.loadtxt(args.atompairs_which, np.int)
        else:
            pairs = None

        metric = AtomPairs(metric=args.atompairs_metric,
                           p=args.atompairs_p,
                           atom_pairs=pairs)

    elif args.metric == 'custom':
        with open(args.picklemetric_input) as f:
            metric = pickle.load(f)
            print '#' * 80
            print 'Loaded custom metric:'
            print metric
            print '#' * 80
    else:
        # apply the constructor on args and take the first non-none element
        # note that using these itertools constructs, we'll only actual
        # execute the constructor until the match is achieved
        metrics = itertools.imap(lambda c: c(args),
                                 locate_metric_plugins('construct_metric'))
        try:
            metric = itertools.dropwhile(lambda c: not c, metrics).next()
        except StopIteration:
            # This means that none of the plugins acceptedthe metric
            raise RuntimeError(
                "Bad metric. Could not be constructed by any built-in or plugin metric. Perhaps you have a poorly written plugin?"
            )

    if not isinstance(metric, AbstractDistanceMetric):
        return ValueError("%s is not a AbstractDistanceMetric" % metric)

    return metric
Esempio n. 17
0
 def cpudist(t):
     rmsd = RMSD()
     pt = rmsd.prepare_trajectory(t)
     return rmsd.one_to_all(pt, pt, 0)
Esempio n. 18
0
    def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride,
						 max_rmsd, min_gens, center_conformations, memory_check,
						 omp_parallel_rmsd=True):
        """
        This function takes in a path to a CLONE and merges all the XTC files
        it finds into a H5 trajectory:

        Parameters
        ----------
        clone_dir : str
            the directory in which the xtc files are found. All of the xtc files
            in this directory are joined together to make a single trajectory
            (.h5) output file

        output_dir : str
            directory where the outputted files will be placed

        trajectory_number : int
            A unique number for this trajectory. This number is used in
            constructing the filename to write the outputted .h5 trajectory to,
            and thus must be unique

        stride: int
            Subsample by only considering every Nth snapshop.
        max_rmsd: {int, None}
            if this value is not None, calculate the RMSD to the pdb_file from
            each snapshot and reject trajectories which have snapshots with RMSD
            greated than max_rmsd. If None, no check is performed

        min_gens : int
            Discard the trajectories that contain fewer than `min_gens` XTC files.

        center_conformations : bool
            center conformations before saving.

        memory_check : bool
            if yes, uses the memory dictionary to do an update rather than a
            complete re-convert.

        omp_parallel_rmsd : bool
            If true, use OpenMP accelerated RMSD calculation for max_rmsd check
        """

        xtc_files = self.list_xtcs_in_dir(clone_dir)

        # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 --
        # into a trajectory. If there are gaps in the xtc files in the directory, we only
        # want to use the the ones such that they are contiguously numbered
        i = 0
        for i, filename in enumerate(xtc_files):
            if self.integer_component(filename) != i:
                logger.error("Found discontinuity in xtc numbering - check data in %s", clone_dir)
                xtc_files = xtc_files[0:i]
                break


        # check the memory object to see which xtc files have already been converted, and
        # exclude those from this conversion
        if memory_check:
            if clone_dir in self.memory.keys():
                previous_convert_exists = True
                num_xtcs_converted = self.memory[clone_dir][1]
                if len(xtc_files) == num_xtcs_converted: # if we have converted everything,
                    logger.info("Already converted all files in %s, skipping...", clone_dir)
                    return                               # just bail out
                else:
                    xtc_files = xtc_files[num_xtcs_converted:]
            else:
                previous_convert_exists = False
        else:
            previous_convert_exists = False

        xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files]

        logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir)

        if len(xtc_files) <= min_gens:
            logger.info("Skipping trajectory in clone_dir = %s", clone_dir)
            logger.info("Too few xtc files (generations).")
            return

        try:
            # [this should check for and discard overlapping snapshots]
            trajectory = Trajectory.load_from_xtc(xtc_file_paths, PDBFilename=self.pdb_topology,
                                                discard_overlapping_frames=True)
        except IOError as e:
            logger.error("IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir)
            logger.error("Attempting rescue by disregarding final frame, which is often")
            logger.error("the first/only frame to be corrupted")

            if len(xtc_file_paths) == 1:
                logger.error("Didn't find any other frames in %s, continuing...", clone_dir)
                return

            try:
                trajectory = Trajectory.load_from_xtc(xtc_file_paths[0:-1], PDBFilename=self.pdb_topology)
            except IOError:
                logger.error("Unfortunately, the error remained even after ignoring the final frame.")
                logger.error("Skipping the trajectory in clone_dir = %s", clone_dir)
                return
            else:
                logger.error("Sucessfully recovered from IOError by disregarding final frame.")

        if max_rmsd is not None:
            atomindices = [ int(i)-1 for i in trajectory['AtomID'] ]
            rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd)
            ppdb = rmsdmetric.prepare_trajectory(Trajectory.load_trajectory_file(self.pdb_topology))
            ptraj = rmsdmetric.prepare_trajectory(trajectory)
            rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0)

            if max(rmsds) > max_rmsd:
                logger.warning("Snapshot %d RMSD %f > the %f cutoff" , argmax(rmsds), max(rmsds), max_rmsd)
                logger.warning("Dropping trajectory")
                return

        if center_conformations:
            RMSD.TheoData.centerConformations(trajectory["XYZList"])

        # if we are adding to a previous trajectory, we have to load that traj up and extend it
        if previous_convert_exists:
            output_filename = self.memory[clone_dir][0]
            output_file_path = output_filename
            logger.info("Extending: %s", output_filename)
            assert os.path.exists( output_filename )

            # load the traj and extend it [this should check for and discard overlapping snapshots]
            Trajectory.append_frames_to_file( output_filename,
                                           trajectory['XYZList'][::stride],
                                           discard_overlapping_frames=True )

            num_xtcs_processed = len(xtc_file_paths) + self.memory[clone_dir][1]

        # if we are not adding to a traj, then we create a new one
        else:
            output_filename = 'trj%s.h5' % trajectory_number
            output_file_path = os.path.join(output_dir, output_filename)

            if os.path.exists(output_file_path):
                logger.info("The file name %s already exists. Skipping it.", output_file_path)
                return

            # stide and discard by snapshot
            trajectory['XYZList'] = trajectory['XYZList'][::stride]
            trajectory.save(output_file_path)

            num_xtcs_processed = len(xtc_file_paths)

        # log what we did into the memory object
        self.memory[clone_dir] = [ output_file_path, num_xtcs_processed ]

        return
Esempio n. 19
0
def construct_metric(args):
    metric_name = args.metric

    if metric_name == 'rmsd':
        if args.rmsd_atom_indices != 'all':
            atom_indices = np.loadtxt(args.rmsd_atom_indices, np.int)
        else:
            atom_indices = None
        metric = RMSD(atom_indices)  # , omp_parallel=args.rmsd_omp_parallel)

    elif metric_name == 'dihedral':
        metric = Dihedral(metric=args.dihedral_metric,
                          p=args.dihedral_p,
                          angles=args.dihedral_angles,
                          userfilename=args.dihedral_userfilename)

    elif metric_name == 'contact':
        if args.contact_which != 'all':
            contact_which = np.loadtxt(args.contact_which, np.int)
        else:
            contact_which = 'all'

        if args.contact_cutoff_file != None:
            contact_cutoff = np.loadtxt(args.contact_cutoff_file, np.float)
        elif args.contact_cutoff != None:
            contact_cutoff = float(args.contact_cutoff)
        else:
            contact_cutoff = None

        if contact_cutoff != None and contact_cutoff < 0:
            metric = ContinuousContact(contacts=contact_which,
                                       scheme=args.contact_scheme)
        else:
            metric = BooleanContact(contacts=contact_which,
                                    cutoff=contact_cutoff,
                                    scheme=args.contact_scheme)

    elif metric_name == 'atompairs':
        if args.atompairs_which != None:
            pairs = np.loadtxt(args.atompairs_which, np.int)
        else:
            pairs = None

        metric = AtomPairs(metric=args.atompairs_metric,
                           p=args.atompairs_p,
                           atom_pairs=pairs)

    elif metric_name == 'positions':
        target = md.load(args.target)

        if args.pos_atom_indices != None:
            atom_indices = np.loadtxt(args.pos_atom_indices, np.int)
        else:
            atom_indices = None

        if args.align_indices != None:
            align_indices = np.loadtxt(args.align_indices, np.int)
        else:
            align_indices = None

        metric = Positions(target,
                           atom_indices=atom_indices,
                           align_indices=align_indices,
                           metric=args.positions_metric,
                           p=args.positions_p)

    elif metric_name == "tica":
        tica_obj = tICA.load(args.tica_fn)

        metric = RedDimPNorm(tica_obj,
                             num_vecs=args.num_vecs,
                             metric=args.projected_metric,
                             p=args.p)

    elif metric_name == 'custom':
        with open(args.picklemetric_input) as f:
            metric = pickle.load(f)
            print('#' * 80)
            print('Loaded custom metric:')
            print(metric)
            print('#' * 80)
    else:
        # apply the constructor on args and take the first non-none element
        # note that using these itertools constructs, we'll only actual
        # execute the constructor until the match is achieved
        metrics = [c(args) for c in locate_metric_plugins('construct_metric')]
        try:
            metric = next(itertools.dropwhile(lambda c: not c, metrics))
        except StopIteration:
            # This means that none of the plugins acceptedthe metric
            raise RuntimeError(
                "Bad metric. Could not be constructed by any built-in or plugin metric. Perhaps you have a poorly written plugin?"
            )

    if not isinstance(metric, AbstractDistanceMetric):
        return ValueError("%s is not a AbstractDistanceMetric" % metric)

    return metric