コード例 #1
0
ファイル: save_states.py プロジェクト: bowman-lab/fast
 def run(self, msm_dir='.'):
     if self.centers != 'none':
         assignments = ra.load(msm_dir + "/data/assignments.h5")
         distances = ra.load(msm_dir + "/data/distances.h5")
         if self.centers == 'auto':
             state_nums = np.load(msm_dir + "/data/unique_states.npy")
         elif self.centers == 'all':
             state_nums = None
         elif self.centers == 'restarts':
             states_to_simulate_file = \
                 msm_dir + "/rankings/states_to_simulate_gen" + \
                 str(self.gen_num) + ".npy"
             state_nums = np.load(states_to_simulate_file)
         save_states(assignments,
                     distances,
                     state_nums=state_nums,
                     n_procs=self.n_procs,
                     largest_center=self.largest_center,
                     save_routine=self.save_routine,
                     msm_dir=msm_dir)
     if self.save_xtc_centers:
         center_filenames = np.sort(
             glob.glob("%s/centers_masses/*.pdb" % msm_dir))
         trj_lengths, xyzs = load_as_concatenated(center_filenames,
                                                  processes=self.n_procs)
         centers = md.Trajectory(xyzs,
                                 topology=md.load("%s/prot_masses.pdb" %
                                                  msm_dir).top)
         centers.save_xtc("%s/data/full_centers.xtc" % msm_dir)
コード例 #2
0
def load_assignments(assignments):

    from enspara.util import array as ra
    from tables import NoSuchNodeError

    if not hasattr(assignments, 'shape'):
        print('loading msm assignments from', assignments)
        try:
            assignments = ra.load(assignments, keys=None)
        except NoSuchNodeError:
            assignments = ra.load(assignments, keys=...)

    return assignments
コード例 #3
0
def main(argv=None):

    args = process_command_line(argv)

    try:
        assignments = ra.load(args.assignments)
    except NoSuchNodeError:
        assignments = ra.load(args.assignments, keys=...)
    if args.trj_ids is not None:
        assignments = assignments[args.trj_ids]

    tscales = implied_timescales(assignments,
                                 args.lag_times,
                                 n_times=args.n_eigenvalues,
                                 sliding_window=True,
                                 trim=args.trim,
                                 method=args.symmetrization,
                                 n_procs=args.processes)

    import matplotlib as mpl
    mpl.use('Agg')
    from matplotlib import pyplot as plt

    unit_factor, unit_str = process_units(args.timestep, args.infer_timestep)

    # scale x and y axes to nanoseconds
    lag_times = np.array(args.lag_times) / unit_factor
    tscales /= unit_factor

    for i in range(args.n_eigenvalues):
        plt.plot(lag_times,
                 tscales[:, i] / unit_factor,
                 label=r'$\lambda_{i}$'.format(i=i + 1))

    if args.logscale:
        plt.yscale('log')

    plt.ylabel('Eigenmotion Speed [{u}]'.format(u=unit_str))
    plt.xlabel('Lag Time [{u}]'.format(u=unit_str))
    plt.legend(frameon=False)

    plt.savefig(args.plot, dpi=300)

    return 0
コード例 #4
0
def main(argv=None):
    args = process_command_line(argv)

    try:
        features = ra.load(args.features, keys=...)
    except exception.DataInvalid:
        features = ra.load(args.features)

    logger.info("Loaded data from %s with shape %s", args.features,
                features.shape)

    if args.cluster_algorithm == 'khybrid':
        clustering = KHybrid(metric=args.cluster_distance,
                             cluster_radius=args.cluster_radius,
                             kmedoids_updates=args.kmedoids_updates)
    elif args.cluster_algorithm == 'kcenters':
        clustering = KCenters(cluster_radius=args.cluster_radius,
                              metric=args.cluster_distance)

    logger.info("Clustering with %s", clustering)

    clustering.fit(features._data)

    result = clustering.result_.partition(features.lengths)
    del features

    ra.save(args.distances, result.distances)
    logger.info("Wrote distances with shape %s to %s", result.distances.shape,
                args.distances)

    ra.save(args.assignments, result.assignments)
    logger.info("Wrote assignments with shape %s to %s",
                result.assignments.shape, args.cluster_centers)

    ra.save(args.cluster_centers, result.centers)
    logger.info("Wrote cluster_centers with shape %s to %s",
                result.centers.shape, args.cluster_centers)

    pickle.dump(result.center_indices, open(args.center_indices, 'wb'))
    logger.info("Wrote %s center_indices with shape to %s",
                len(result.center_indices), args.center_indices)

    return 0
コード例 #5
0
ファイル: cluster.py プロジェクト: rafwiewiora/enspara
def load_features(features, stride):
    if len(features) == 1:
        with timed("Loading features took %.1f s.", logger.info):
            try:
                data = ra.load(features[0])
            except tables.exceptions.NoSuchNodeError:
                data = ra.load(features[0], keys=...)

        lengths = data.lengths
        data = data._data
    else:  # and len(features) > 1
        with timed("Loading features took %.1f s.", logger.info):
            lengths, data = mpi.io.load_npy_as_striped(features, stride)

        with timed("Turned over array in %.2f min", logger.info):
            tmp_data = data.copy()
            del data
            data = tmp_data

    return lengths, data
コード例 #6
0
 def check_clustering(self, msm_dir, gen_num, n_kids, verbose=True):
     correct_clustering = True
     total_assignments = (gen_num + 1) * n_kids
     assignments = ra.load(msm_dir + '/data/assignments.h5')
     n_assignments = len(assignments)
     if total_assignments != n_assignments:
         correct_clustering = False
         logging.info(
             "inconsistent number of trajectories between assignments and data!"
         )
     return correct_clustering
コード例 #7
0
ファイル: core.py プロジェクト: bowman-lab/fast
def _prop_msm(msm_dir, msm_obj):
    """Propagate MSM files."""
    t0 = time.time()
    # load assignments and build MSM
    assignments = ra.load(msm_dir + '/data/assignments.h5')
    msm_obj.fit(assignments)
    # write counts, probs, and popoulations (if applicable)
    scipy.io.mmwrite(msm_dir + '/data/tcounts.mtx', msm_obj.tcounts_)
    scipy.io.mmwrite(msm_dir + '/data/tprobs.mtx', msm_obj.tprobs_)
    if msm_obj.eq_probs_ is not None:
        np.save(msm_dir + '/data/populations.npy', msm_obj.eq_probs_)
    t1 = time.time()
    logging.info("building MSM took %0.4f seconds" % (t1 - t0))
    return msm_obj
コード例 #8
0
def main(argv=None):
    '''Run the driver script for this module. This code only runs if we're
    being run as a script. Otherwise, it's silent and just exposes methods.'''
    args = process_command_line(argv)

    try:
        assignments = ra.load(args.assignments)
    except NoSuchNodeError:
        assignments = ra.load(args.assignments, keys=...)
    if args.trj_ids is not None:
        assignments = assignments[args.trj_ids]

    tscales = implied_timescales(
        assignments, args.lag_times, n_times=args.n_eigenvalues,
        sliding_window=True, trim=args.trim,
        method=args.symmetrization, n_procs=args.processes)

    unit_factor, unit_str = process_units(args.timestep, args.infer_timestep)

    # scale x and y axes to nanoseconds
    lag_times = np.array(args.lag_times) / unit_factor
    tscales /= unit_factor

    for i in range(args.n_eigenvalues):
        plt.plot(lag_times, tscales[:, i] / unit_factor,
                 label=r'$\lambda_{i}$'.format(i=i+1))

    if args.logscale:
        plt.yscale('log')

    plt.ylabel('Eigenmotion Speed [{u}]'.format(u=unit_str))
    plt.xlabel('Lag Time [{u}]'.format(u=unit_str))
    plt.legend(frameon=False)

    plt.savefig(args.plot, dpi=300)

    return 0
コード例 #9
0
ファイル: core.py プロジェクト: bowman-lab/fast
def _perform_analysis(analysis_obj, msm_dir, gen_num, sub_obj, q_check_obj,
                      update_data):
    """Performs analysis of cluster centers.

    Inputs
    ----------
    analysis_obj : object,
        The object used for analysis.
    msm_dir : str,
        MSM directory where analysis is performed.
    gen_num : int,
        Generation number.
    sub_obj : object,
        Submission wrapper object.
    q_check_obj : object,
        Queueing system wrapper to determine if submission is still
        running.
    update_data : bool,
        Flag for rebuilding whole analysis or analyzing a subset of
        structures.
    """
    t0 = time.time()
    # determine if there is an analysis object
    if analysis_obj is None:
        state_rankings = None
    else:
        # set the objects output
        analysis_obj.set_output(msm_dir, gen_num)
        # optionally set rebuild or continue analysis
        if hasattr(analysis_obj, 'build_full'):
            analysis_obj.build_full = update_data
        # if the output doesn't exists, pickle submit analysis
        if not os.path.exists(analysis_obj.output_name):
            _pickle_submit(msm_dir, analysis_obj, sub_obj, q_check_obj,
                           gen_num, 'analysis')
        # get rankings
        state_rankings = analysis_obj.state_rankings
        # check that everything went well
        # number of state rankings should be equal to number of state
        # in the assignments
        n_states_ranked = len(state_rankings)
        n_states = len(np.unique(ra.load(msm_dir + '/data/assignments.h5')))
        if n_states_ranked != n_states:
            raise DataInvalid(
                'The number of state rankings does not match the number ' + \
                'of states in the assignments! Analysis may have failed!')
    t1 = time.time()
    logging.info("analysis took %0.4f seconds" % (t1 - t0))
    return state_rankings
コード例 #10
0
ファイル: save_states.py プロジェクト: bowman-lab/fast
 def check_save_states(self, msm_dir):
     assigns = ra.load(msm_dir + '/data/assignments.h5')
     unique_states = np.unique(assigns)
     n_states = unique_states.shape[0]
     correct_save = True
     save_masses = False
     save_restarts = False
     if (self.save_routine == 'masses') or (self.save_routine == 'full'):
         save_masses = True
     if (self.save_routine == 'restarts') or (self.save_routine == 'full'):
         save_restarts = True
     if (self.centers == 'none') or (self.centers == 'restarts'):
         pass
     else:
         if save_masses:
             n_masses = len(glob.glob(msm_dir + '/centers_masses/*.pdb'))
             if n_masses != n_states:
                 correct_save = False
         if save_restarts:
             n_restarts = len(glob.glob(msm_dir +
                                        '/centers_restarts/*.gro'))
             if n_restarts != n_states:
                 correct_save = False
     return correct_save
コード例 #11
0
import glob
import mdtraj as md
import numpy as np
import os
from fast.msm_gen import save_states as ss
from functools import partial
from multiprocessing import Pool
from enspara import cluster
from enspara.msm import MSM, builders
from enspara.util.load import load_as_concatenated
from enspara.util import array as ra

dist_cutoff = 0.01
assignments = ra.load("./data/assignments.h5")
distances = ra.load("./data/distances.h5")
ss.save_states(assignments,
               distances,
               save_routine='masses',
               largest_center=dist_cutoff,
               n_confs=1,
               n_procs=64)
print("Saving the states!")

prot_masses = "./prot_masses.pdb"
prot_masses = md.load(prot_masses)
pdb_names = np.sort(glob.glob("./centers_masses/*.pdb"))
trj_lengths, xyzs = load_as_concatenated(pdb_names,
                                         processes=64,
                                         top=prot_masses)
centers_full = md.Trajectory(xyz=xyzs, topology=prot_masses.top)
centers_full.save_xtc("./data/full_centers.xtc")
コード例 #12
0
ファイル: build_msm.py プロジェクト: sbhakat/Plasmepsin-bace
def entry_point():

    if True:
        # filenames
        filenames = np.sort([
            os.path.abspath(pathname)
            for pathname in glob.glob("./trajectories/*.xtc")
        ])

        print("obtained filenames!")

        # load atom indices
        pdb = md.load("prot_masses.pdb")
        iis = pdb.topology.select("backbone and resid 72 to 87")
        # iis = np.loadtxt("./atom-indices-bb.dat", dtype=int)

        # topology filename
        prot_masses = "./prot_masses.pdb"
        prot_masses = md.load(prot_masses)

        # load trjs
        print("about to load!!")
        centers = prot_masses.atom_slice(iis)
        trj_lengths, xyzs = load_as_concatenated(filenames=filenames,
                                                 atom_indices=iis,
                                                 processes=48,
                                                 top=prot_masses)
        trjs_sub = md.Trajectory(xyz=xyzs, topology=centers.top)
        del xyzs

    if True:
        # get subset

        n_clusters = 10000
        #n_clusters = None
        dist_cutoff = 0.01
        clusterer = cluster.KCenters(metric=md.rmsd,
                                     cluster_radius=dist_cutoff,
                                     n_clusters=n_clusters)
        #clusterer = cluster.KHybrid(metric=md.rmsd, cluster_radius=dist_cutoff, n_clusters=n_clusters, kmedoids_updates=2)
        clusterer.fit(trjs_sub)
        center_indices, distances, assignments, centers = \
            clusterer.result_.partition(trj_lengths)
        ra.save("./data/assignments.h5", assignments)
        ra.save("./data/distances.h5", distances)
        trjs_sub[clusterer.center_indices_].save_xtc("./data/centers.xtc")
        np.save("./data/center_indices.npy", clusterer.center_indices_)

        print("Done clustering!")

    if True:
        lag_time = 10  # 20ps * 200 = 4 ns
        #lag_time = 1 # 20ps * 200 = 4 ns
        assignments = ra.load("./data/assignments.h5")
        unique_states = np.unique(np.concatenate(assignments))
        b = partial(builders.normalize,
                    prior_counts=1 / unique_states.shape[0])
        msm_obj = MSM(lag_time=lag_time, method=b)
        msm_obj.fit(assignments)
        np.save("./data/tcounts.npy", msm_obj.tcounts_)
        np.save("./data/tprobs.npy", msm_obj.tprobs_)
        np.save("./data/populations.npy", msm_obj.eq_probs_)

        print("Done MSM!")