Example #1
0
def main():
    """Parse command line inputs, load up files, and build a movie."""

    parser = arglib.ArgumentParser(description="""
Create an MSM movie by sampling a sequence of states and sampling a 
random conformation from each state in the sequence.  
""")
    parser.add_argument('project')
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument('tprob', default='Data/tProb.mtx')
    parser.add_argument('num_steps')
    parser.add_argument('starting_state', type=int, help='''Which state to start trajectory from.''')
    parser.add_argument('output', default='sample_traj.pdb', help="""The filename of your output trajectory.  The filetype suffix will be used to select the output file format.""")
    args = parser.parse_args()

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
    
    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)
    
    project = Project.load_from(args.project)    
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Example #2
0
def run( tProb, start_state, steps, project, out_pdb, out_xtc ):

    state_traj = msm_analysis.sample( tProb, start_state, steps )
    print "Sampled tProb."
    state_sizes = np.bincount( assignments[np.where(assignments!=-1)] ) # size of each state.
    which_ind_traj = [ np.random.randint( state_sizes[i] ) for i in state_traj ] # Random integer in each state in the traj

    uniq_states = np.unique( state_traj )

    state_lookup = dict(zip(uniq_states, np.arange(uniq_states.shape[0])))

    print "Translating to trajectory, frame pairs..."
    which_states = [ np.array( np.where( assignments == i ) ).T for i in uniq_states ]
        # The (traj,frame) list for each state
    which_traj = np.array([which_states[state_lookup[uniq_states[state_lookup[state]]]][i] 
                           for state, i in zip(state_traj, which_ind_traj)])
        # Grab a random (traj,frame) for each state visited in the trajectory
    
    print "Loading frames from the Trajectory."

    traj = project.load_frame(which_traj[0][0], which_traj[0][1])

    for i in range(1, len(which_traj)):
        traj += project.load_frame(which_traj[i][0], which_traj[i][1])
        print i
    
    traj[0].save_to_pdb(out_pdb)
    traj.save_to_xtc(out_xtc)

    print "Saved output to %s and %s" % (out_pdb, out_xtc)

    return
    def __init__(self):
        self.epsilon = 1E-7
        self.alpha = 0.001  # Confidence for uncertainty estimate
        # Testing is stochastic; we expect errors 0.1 % of the time.
        self.max_lag = 100
        self.times = np.arange(self.max_lag)
        self.num_steps = 100000

        self.C = np.array([[500, 2], [2, 50]])
        self.T = MSMLib.estimate_transition_matrix(self.C)
        self.state_traj = np.array(msm_analysis.sample(self.T, 0, self.num_steps))
Example #4
0
def main(modeldir, start, type):
    start=int(start)
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0])
    pdb=files[0]
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    T=mmread('%s/tProb.mtx' % modeldir)
    startstate=unbound[start]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    steps=100000
    print "on start state %s" % startstate
    if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)):
        print "loading from states"
        traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate))
    else:
        traj=msm_analysis.sample(T, int(startstate),int(steps))
        numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj)
    print "checking for chkpt file"
    checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate))
    if len(checkfile) > 0:
        movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb)
        n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0])
        os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0]))
        print "checkpointing at state index %s out of %s" % (n, len(traj))
        checkfile=checkfile[0]
        restart=True
    else:
        restart=False
        n=0
        movie=project.empty_traj()
    while n < len(traj):
        print "on state %s" % n
        state=int(traj[n])
        t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10)
        if n==0:
            movie['XYZList']=t[0]['XYZList']
            n+=1
            continue
        elif n % 100==0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            if restart==True:
                os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0]))
            movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n))
            checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)
            n+=1
            continue
        elif n!=0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            n+=1
            continue
    movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
Example #5
0
def raw_msm_correlation(trans_matrix,
                        observable,
                        assignments,
                        n_steps=10000,
                        starting_state=0):
    """Calculate an autocorrelation function from an MSM.

    Parameters
    ----------
    trans_matrix : sparse or dense matrix
        Transition matrix
    observable : np.ndarray, shape=[n_trajs, max_n_frames]
        Value of the observable in each conf
    assignments : np.ndarray, shape=[n_trajs, max_n_frames]
        State membership for each conf
    n_steps : int
        Number of steps to simulate
    starting_state : int
        State to start the trajectory from

    
    Notes
    -----
    This function works by first generating a 'sample' trajectory from the MSM. This
    approach is necessary as it allows treatment of intra-state dynamics.
    
    Returns
    -------
    correlation : np.ndarray, shape=[n_steps]
        The autocorrelation of the observable
    traj : np.ndarray, shape=[n_steps]
        The simulated trajectory, represented as a sequence of states
    obs_traj : np.ndarray, shape=[n_steps]
        The observable signal, as a sequence of values from traj
    """

    traj = msm_analysis.sample(trans_matrix, starting_state, n_steps)

    # NOTE: I'm not sure if this MaxInt is right. (RTM 9/6)
    MaxInt = np.ones(assignments.shape).sum()

    obs_traj = np.array([
        observable[np.where(assignments == State)].take(
            [np.random.randint(MaxInt)], mode='wrap') for State in traj
    ])

    corr = fft_acf(obs_traj)

    return corr, traj, obs_traj
Example #6
0
def raw_msm_correlation(trans_matrix, observable, assignments, n_steps=10000, starting_state=0):
    """Calculate an autocorrelation function from an MSM.

    Parameters
    ----------
    trans_matrix : sparse or dense matrix
        Transition matrix
    observable : np.ndarray, shape=[n_trajs, max_n_frames]
        Value of the observable in each conf
    assignments : np.ndarray, shape=[n_trajs, max_n_frames]
        State membership for each conf
    n_steps : int
        Number of steps to simulate
    starting_state : int
        State to start the trajectory from

    
    Notes
    -----
    This function works by first generating a 'sample' trajectory from the MSM. This
    approach is necessary as it allows treatment of intra-state dynamics.
    
    Returns
    -------
    correlation : np.ndarray, shape=[n_steps]
        The autocorrelation of the observable
    traj : np.ndarray, shape=[n_steps]
        The simulated trajectory, represented as a sequence of states
    obs_traj : np.ndarray, shape=[n_steps]
        The observable signal, as a sequence of values from traj
    """

    traj = msm_analysis.sample(trans_matrix, starting_state, n_steps)

    # NOTE: I'm not sure if this MaxInt is right. (RTM 9/6)
    MaxInt = np.ones(assignments.shape).sum()

    obs_traj = np.array( [observable[ np.where( assignments  == State ) ].take( [ np.random.randint( MaxInt ) ], mode='wrap' ) for State in traj ] )

    corr = fft_acf(obs_traj)

    return corr, traj, obs_traj
Example #7
0
def main():
    """Parse command line inputs, load up files, and build a movie."""
    args = parser.parse_args()
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)

    project = Project.load_from(args.project)
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(
        assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
def entry_point():
    """Parse command line inputs, load up files, and build a movie."""
    args = parser.parse_args()
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)

    project = Project.load_from(args.project)
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(
        assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
triples = [
    (0, 1, 2),  # (waypoint, source, sink) to test
    (0, 1, 3),
    (0, 1, 4)
]

# load in the transition matrx
N = 11
T = np.transpose(np.genfromtxt('mat_1.dat')[:, :-3])
print(T)
print(T.shape)
print(T.sum(1))

# sample from it
print("Making a len: %d traj" % steps)
traj = msm_analysis.sample(T, np.random.randint(11), steps, force_dense=True)
print("Generated traj")

# count the fraction visits

n_visited_waypoint = 0
n_notvisited_waypoint = 0

started = False
visited = False

for (waypoint, source, sink) in triples:

    for n, i in enumerate(traj):

        if n % 10000 == 0:
Example #10
0
steps = 10**6 # sample steps
triples = [ (0, 1, 2),     # (waypoint, source, sink) to test
            (0, 1, 3),
            (0, 1, 4) ] 


# load in the transition matrx
N = 11
T = np.transpose( np.genfromtxt('mat_1.dat')[:,:-3] )
print T
print T.shape
print T.sum(1)

# sample from it
print "Making a len: %d traj" % steps
traj = msm_analysis.sample(T, np.random.randint(11), steps, force_dense=True)
print "Generated traj"

# count the fraction visits

n_visited_waypoint = 0
n_notvisited_waypoint = 0

started = False
visited = False

for (waypoint, source, sink) in triples:
    
    for n,i in enumerate(traj):
        
        if n % 10000 == 0:
Example #11
0
"""

import os
import sys
import numpy as np
import scipy.sparse

from msmbuilder import io, msm_analysis, MSMLib
from bayesmutant import SimpleMutantSampler

P = np.loadtxt('base_transition_matrix.dat')

mutant_transition_matrix = P + 0.2*scipy.sparse.rand(P.shape[0], P.shape[1], density=0.1).todense()
mutant_transition_matrix /= np.sum(mutant_transition_matrix, axis=1)

trajectory =  np.array(msm_analysis.sample(P, 0, 5000))
base_counts = MSMLib.get_counts_from_traj(trajectory).todense()


print 'base counts'
print base_counts

ms = SimpleMutantSampler(base_counts, mutant_transition_matrix)
ms.step(5000)

print 'observed counts'
print ms.counts

io.saveh('sampling.h5', base_counts=base_counts, samples=ms.samples,
                        observed_counts=ms.counts, scores=ms.scores,
                        transition_matrix=mutant_transition_matrix)
Example #12
0
    print "Cannot load msm data."

tProb = mmread(args.tProb)
try:
    raw_data = io.loadh(args.raw_data)["arr_0"]
except:
    raw_data = io.loadh(args.raw_data)["Data"]

num_frames = raw_data.shape[1]

num_lagtimes = num_frames / args.lagtime

# msm_acf = msm_analysis.msm_acf(tProb, msm_data, np.arange(num_lagtimes),
#                               num_modes=args.num_modes)

sampled_traj = msm_analysis.sample(tProb, np.random.randint(tProb.shape[0]), num_lagtimes)
data_traj = msm_data[sampled_traj]
msm_acf = autocorrelate.fft_autocorrelate(data_traj)

raw_acfs = []
for i in xrange(np.max([raw_data.shape[0], 10])):

    max_non_neg = np.where(raw_data[i] != -1)[0].max()
    row = raw_data[i][: max_non_neg + 1]

    raw_acfs.append(autocorrelate.fft_autocorrelate(row))

figure()
axes((0.18, 0.18, 0.72, 0.72))

raw_label = "Raw Data"
def _run_trial(arg_dict):

    # inject the arg_dict into the local namespace - may be a bad idea...
    for key in arg_dict.keys():
        exec(key + " = arg_dict['" + key + "']")

    # initialize data structures to hold output
    distance_to_target = np.zeros(rounds_of_sampling)
    obs_distance = np.zeros(rounds_of_sampling)

    # the assignments array will hold all of the output of all simulations
    assignments = -1.0 * np.ones((rounds_of_sampling * simultaneous_samplers + 1,
                                      max(size_of_intial_data, length_of_sampling_trajs+1) ))

    # initialize the "true" transition matrix
    if not transition_matrix:
        assert num_states > 0
        C_rand = np.random.randint( 0, 100, (num_states, num_states) )
        C_rand += C_rand.T
        T = MSMLib.estimate_transition_matrix( C_rand )
    else:
        T = transition_matrix
        num_states = T.shape[0]
    T = sparse.csr_matrix(T)
    msm_analysis.check_transition(T)
        
    if observable_function:
        try:
            obs_goal = observable_function(T)
        except Exception as e:
            print >> sys.stderr, e
            raise Exception("Error evaluating function: %s" % observable_function.__name__)
            
    assignments[0,:size_of_intial_data] = msm_analysis.sample(T, None, size_of_intial_data)

    # iterate, adding simulation time
    for sampling_round in range(rounds_of_sampling):
        
        # apply the adaptive sampling method - we need to be true to what a
        # real simulation would actually see for the counts matrix
        mod_assignments = assignments.copy()
        mapping = MSMLib.renumber_states( mod_assignments )
        C_mod = MSMLib.get_count_matrix_from_assignments( mod_assignments )
        T_mod = MSMLib.estimate_transition_matrix(C_mod)
        adaptive_sampling_multivariate = SamplerObject.sample(C_mod)

        # choose the states to sample from (in the original indexing)
        state_inds = np.arange(len(adaptive_sampling_multivariate))
        sampler = stats.rv_discrete(name='sampler', 
                                    values=[state_inds, adaptive_sampling_multivariate])
        starting_states = sampler.rvs( size=simultaneous_samplers )
        starting_states = mapping[starting_states]

        # start new 'simulations' in each of those states
        for i,init_state in enumerate(starting_states):
            a_ind = sampling_round * simultaneous_samplers + i + 1
            s_ind = length_of_sampling_trajs + 1
            assignments[a_ind,:s_ind] = msm_analysis.sample(T, init_state, s_ind)

        # build a new MSM from all the simulation so far
        C_raw = MSMLib.get_count_matrix_from_assignments( assignments, n_states=num_states )
        C_raw = C_raw + C_raw.T # might want to add trimming, etc.
        T_pred = MSMLib.estimate_transition_matrix(C_raw) 

        # calculate the error between the real transition matrix and our best prediction
        assert T.shape == T_pred.shape
        distance_to_target[sampling_round] = np.sqrt( ((T_pred - T).data ** 2).sum() ) \
                                             / float(num_states)

        if observable_function:
            obs_distance[sampling_round] = np.abs(observable_function(T_mod) - obs_goal)

    return distance_to_target, obs_distance