예제 #1
0
 def run(self):
     # load and concat trjs
     if self.mem_efficient:
         trj_lengths, xyzs = load_as_concatenated(
             filenames=self.trj_filenames,
             processes=self.n_procs,
             top=self.base_struct_md,
             atom_indices=self.atom_indices_vals)
         trjs_sub = md.Trajectory(
             xyzs,
             self.base_struct_md.atom_slice(
                 self.atom_indices_vals).topology)
     else:
         trj_lengths, xyzs = load_as_concatenated(
             filenames=self.trj_filenames,
             processes=self.n_procs,
             top=self.base_struct_md)
         trjs = md.Trajectory(xyzs, self.base_struct_md.topology)
         trjs_sub = trjs.atom_slice(self.atom_indices_vals)
     # determine if rebuilding all msm stuff
     if self.build_full:
         base_struct_centers = self.base_struct_md.atom_slice(
             self.atom_indices_vals)
         base_struct_centers.save_pdb("./centers.pdb")
         self.base_struct_md.save_pdb("./prot_masses.pdb")
         init_centers = None
     else:
         init_centers = md.load("./data/centers.xtc", top="./centers.pdb")
     # fit data with base clustering object
     self.base_clust_obj.fit(trjs_sub, init_centers=init_centers)
     center_indices, distances, assignments, centers = \
         self.base_clust_obj.result_.partition(trj_lengths)
     # save data
     ra.save("./data/assignments.h5", assignments)
     ra.save("./data/distances.h5", distances)
     trjs_sub = trjs_sub[self.base_clust_obj.center_indices_]
     trjs_sub.superpose(trjs_sub[0])
     trjs_sub.save_xtc("./data/centers.xtc")
     if not self.mem_efficient:
         full_centers = trjs[self.base_clust_obj.center_indices_]
         full_centers.superpose(self.base_struct_md)
         full_centers.save_xtc("./data/full_centers.xtc")
     # save states
     n_states = len(self.base_clust_obj.center_indices_)
     unique_states = np.arange(n_states)
     if init_centers is not None:
         unique_states = unique_states[-(n_states - len(init_centers)):]
     np.save("./data/unique_states.npy", unique_states)
예제 #2
0
 def run(self, msm_dir='.'):
     if self.centers != 'none':
         assignments = ra.load(msm_dir + "/data/assignments.h5")
         distances = ra.load(msm_dir + "/data/distances.h5")
         if self.centers == 'auto':
             state_nums = np.load(msm_dir + "/data/unique_states.npy")
         elif self.centers == 'all':
             state_nums = None
         elif self.centers == 'restarts':
             states_to_simulate_file = \
                 msm_dir + "/rankings/states_to_simulate_gen" + \
                 str(self.gen_num) + ".npy"
             state_nums = np.load(states_to_simulate_file)
         save_states(assignments,
                     distances,
                     state_nums=state_nums,
                     n_procs=self.n_procs,
                     largest_center=self.largest_center,
                     save_routine=self.save_routine,
                     msm_dir=msm_dir)
     if self.save_xtc_centers:
         center_filenames = np.sort(
             glob.glob("%s/centers_masses/*.pdb" % msm_dir))
         trj_lengths, xyzs = load_as_concatenated(center_filenames,
                                                  processes=self.n_procs)
         centers = md.Trajectory(xyzs,
                                 topology=md.load("%s/prot_masses.pdb" %
                                                  msm_dir).top)
         centers.save_xtc("%s/data/full_centers.xtc" % msm_dir)
예제 #3
0
def write_struct_ctrs(trajectoryfiles,
                      topology,
                      ctr_inds_file,
                      ctr_structs_file,
                      run_after=None):

    import os
    import pickle
    import mdtraj as md
    from enspara.util.load import load_as_concatenated

    print("Loading center indices at", ctr_inds_file)

    if os.path.isfile(ctr_structs_file):
        print("Overwriting", ctr_structs_file)
    #     print("Refusing to overwrite", ctr_structs_file)
    #     return ctr_structs_file

    try:
        with open(ctr_inds_file, 'rb') as f:
            ctr_inds = pickle.load(f)
    except pickle.UnpicklingError:
        import numpy as np
        ctr_inds = np.load(ctr_inds_file)

    print("Loaded %s center indices." % len(ctr_inds))
    top = md.load(topology).top

    try:
        lengths, xyz = load_as_concatenated(
            filenames=[trajectoryfiles[tr] for tr, fr in ctr_inds],
            args=[{
                'frame': fr,
                'top': top
            } for tr, fr in ctr_inds],
            processes=4)
    except IndexError:
        print(len(trajectoryfiles), len(ctr_inds),
              max([tr for tr, fr in ctr_inds]))
        raise

    ctr_structs = md.Trajectory(xyz=xyz, topology=top)
    ctr_structs.save(ctr_structs_file)

    return ctr_structs_file
예제 #4
0
def batch_reassign(targets, centers, lengths, frac_mem, n_procs=None):

    example_center = centers[0]

    DTYPE_BYTES = 4
    batch_size, batch_gb = determine_batch_size(
        example_center.n_atoms, DTYPE_BYTES, frac_mem)

    logger.info(
        'Batch max size set to %s frames (~%.2f GB, %.1f%% of total RAM).' %
        (batch_size, batch_gb, frac_mem*100))

    if batch_size < max(lengths):
        raise enspara.exception.ImproperlyConfigured(
            'Batch size of %s was smaller than largest file (size %s).' %
            (batch_size, max(lengths)))

    batches = compute_batches(lengths, batch_size)

    assignments = []
    distances = []

    for i, batch_indices in enumerate(batches):
        tick = time.perf_counter()
        logger.info("Starting batch %s of %s", i+1, len(batches))
        batch_targets = [targets[i] for i in batch_indices]

        with timed("Loaded frames for batch in %.1f seconds", logger.info):
            batch_lengths, xyz = load_as_concatenated(
                [tfile for tfile, top, aids in batch_targets],
                lengths=[lengths[i] for i in batch_indices],
                args=[{'top': top, 'atom_indices': aids}
                      for t, top, aids in batch_targets],
                processes=n_procs)

        # mdtraj loads as float32, and load_as_concatenated should thus
        # also load as float32. This should _never_ be hit, but there might be
        # some platform-specific situation where double != float64?
        assert xyz.dtype.itemsize == DTYPE_BYTES

        trj = md.Trajectory(xyz, topology=example_center.top)

        with timed("Precentered trajectories in %.1f seconds", logger.debug):
            trj.center_coordinates()

        with timed("Assigned trajectories in %.1f seconds", logger.debug):
            batch_assignments, batch_distances = assign_to_nearest_center(
                    trj, centers, partial(md.rmsd, precentered=True))

        # clear memory of xyz and trj to allow cleanup to deallocate
        # these large arrays; may help with memory high-water mark
        with timed("Cleared array from memory in %.1f seconds", logger.debug):
            xyz_size = xyz.size
            del trj, xyz

        assignments.extend(partition_list(batch_assignments, batch_lengths))
        distances.extend(partition_list(batch_distances, batch_lengths))

        logger.info(
            "Finished batch %s of %s in %.1f seconds. Coordinates array had "
            "memory footprint of %.2f GB (of memory high-water mark %.2f/%.2f "
            "GB).",
            i, len(batches), time.perf_counter() - tick,
            xyz_size * DTYPE_BYTES / 1024**3,
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024**2,
            psutil.virtual_memory().total / 1024**3)

    return assignments, distances
예제 #5
0
import numpy as np
import os
from fast.msm_gen import save_states as ss
from functools import partial
from multiprocessing import Pool
from enspara import cluster
from enspara.msm import MSM, builders
from enspara.util.load import load_as_concatenated
from enspara.util import array as ra

dist_cutoff = 0.01
assignments = ra.load("./data/assignments.h5")
distances = ra.load("./data/distances.h5")
ss.save_states(assignments,
               distances,
               save_routine='masses',
               largest_center=dist_cutoff,
               n_confs=1,
               n_procs=64)
print("Saving the states!")

prot_masses = "./prot_masses.pdb"
prot_masses = md.load(prot_masses)
pdb_names = np.sort(glob.glob("./centers_masses/*.pdb"))
trj_lengths, xyzs = load_as_concatenated(pdb_names,
                                         processes=64,
                                         top=prot_masses)
centers_full = md.Trajectory(xyz=xyzs, topology=prot_masses.top)
centers_full.save_xtc("./data/full_centers.xtc")
print("Wrote Full Cluster Centers!")
예제 #6
0
def entry_point():

    if True:
        # filenames
        filenames = np.sort([
            os.path.abspath(pathname)
            for pathname in glob.glob("./trajectories/*.xtc")
        ])

        print("obtained filenames!")

        # load atom indices
        pdb = md.load("prot_masses.pdb")
        iis = pdb.topology.select("backbone and resid 72 to 87")
        # iis = np.loadtxt("./atom-indices-bb.dat", dtype=int)

        # topology filename
        prot_masses = "./prot_masses.pdb"
        prot_masses = md.load(prot_masses)

        # load trjs
        print("about to load!!")
        centers = prot_masses.atom_slice(iis)
        trj_lengths, xyzs = load_as_concatenated(filenames=filenames,
                                                 atom_indices=iis,
                                                 processes=48,
                                                 top=prot_masses)
        trjs_sub = md.Trajectory(xyz=xyzs, topology=centers.top)
        del xyzs

    if True:
        # get subset

        n_clusters = 10000
        #n_clusters = None
        dist_cutoff = 0.01
        clusterer = cluster.KCenters(metric=md.rmsd,
                                     cluster_radius=dist_cutoff,
                                     n_clusters=n_clusters)
        #clusterer = cluster.KHybrid(metric=md.rmsd, cluster_radius=dist_cutoff, n_clusters=n_clusters, kmedoids_updates=2)
        clusterer.fit(trjs_sub)
        center_indices, distances, assignments, centers = \
            clusterer.result_.partition(trj_lengths)
        ra.save("./data/assignments.h5", assignments)
        ra.save("./data/distances.h5", distances)
        trjs_sub[clusterer.center_indices_].save_xtc("./data/centers.xtc")
        np.save("./data/center_indices.npy", clusterer.center_indices_)

        print("Done clustering!")

    if True:
        lag_time = 10  # 20ps * 200 = 4 ns
        #lag_time = 1 # 20ps * 200 = 4 ns
        assignments = ra.load("./data/assignments.h5")
        unique_states = np.unique(np.concatenate(assignments))
        b = partial(builders.normalize,
                    prior_counts=1 / unique_states.shape[0])
        msm_obj = MSM(lag_time=lag_time, method=b)
        msm_obj.fit(assignments)
        np.save("./data/tcounts.npy", msm_obj.tcounts_)
        np.save("./data/tprobs.npy", msm_obj.tprobs_)
        np.save("./data/populations.npy", msm_obj.eq_probs_)

        print("Done MSM!")