Esempio n. 1
0
    def test_2(self):
        try:
            subprocess.Popen('ipcluster start --cluster-id=testclusterid --n=1 --daemonize', shell=True)
            time.sleep(5)
            
            args = self.Args()
            args.output_dir = tempfile.mkdtemp()
            args.cluster_id = 'testclusterid'

            logger = AssignParallel.setup_logger()
            AssignParallel.main(args, self.metric, logger)

            assignments = Serializer.load_data(os.path.join(args.output_dir, 'Assignments.h5'))
            r_assignments = Serializer.load_data(os.path.join(fixtures_dir(), 'Assignments.h5'))
            distances = Serializer.load_data(os.path.join(args.output_dir, 'Assignments.h5.distances'))
            r_distances = Serializer.load_data(os.path.join(fixtures_dir(), 'Assignments.h5.distances')) 

            npt.assert_array_equal(assignments, r_assignments)
            npt.assert_array_almost_equal(distances, r_distances)
        
        except:
            raise
        finally:
            shutil.rmtree(args.output_dir)
            subprocess.Popen('ipcluster stop', shell=True).wait()
Esempio n. 2
0
    def test_2(self):
        try:
            subprocess.Popen(
                'ipcluster start --cluster-id=testclusterid --n=1 --daemonize',
                shell=True)
            time.sleep(5)

            args = self.Args()
            args.output_dir = tempfile.mkdtemp()
            args.cluster_id = 'testclusterid'

            logger = AssignParallel.setup_logger()
            AssignParallel.main(args, self.metric, logger)

            assignments = Serializer.load_data(
                os.path.join(args.output_dir, 'Assignments.h5'))
            r_assignments = Serializer.load_data(
                os.path.join(fixtures_dir(), 'Assignments.h5'))
            distances = Serializer.load_data(
                os.path.join(args.output_dir, 'Assignments.h5.distances'))
            r_distances = Serializer.load_data(
                os.path.join(fixtures_dir(), 'Assignments.h5.distances'))

            npt.assert_array_equal(assignments, r_assignments)
            npt.assert_array_almost_equal(distances, r_distances)

        except:
            raise
        finally:
            shutil.rmtree(args.output_dir)
            subprocess.Popen('ipcluster stop', shell=True).wait()
Esempio n. 3
0
 def save_container(filename, dtype):
     s = Serializer({
         'Data': np.array(minus_ones, dtype=dtype),
         'completed_vtrajs': np.zeros((n_vtrajs), dtype=np.bool),
         'hashes': hashes
     })
     s.save_to_hdf(filename)
    def __init__(self, information, projectfile, populationfile,
                 assignmentfile_fixed, tmatrixfile, rawdatafile):

        try:
            self.Info = information
            self.ProjectInfo = Serializer.LoadFromHDF(projectfile)
            self.Population = loadtxt(populationfile)
            self.Assignments = Serializer.LoadFromHDF(assignmentfile_fixed)
            self.Tmatrix = mmread(tmatrixfile)
            self.StateAssignment = hct.get_StatesAssignments(self.Assignments)
            self.getrawdata(rawdatafile)
        except:
            print "Having trouble with getting required files"
            raise
Esempio n. 5
0
def CalculateProjectRg(ProjectInfo, Output, returnRgs=False):
    """
    Calculate Radius of gyration for the Project ie. all the Trajectories.
    ProjectInfo: ProjectInfo.h5 file.
    Output: output file (XXX.dat). 
    The Output default will be set in the scripts and it is './Rgs.dat'.
    """
    Output = checkoutput(Output)

    if not isinstance(ProjectInfo, str):
        print "Please input the Path to ProjectInfo.h5"
        raise IOError
    print 'Calculating the Rg for each trajectory......'
    ProjectInfoPath = '/'.join(os.path.realpath(ProjectInfo).split('/')[:-1])
    os.chdir(ProjectInfoPath)
    Trajfiles = []
    ProjectInfo = Serializer.LoadFromHDF(ProjectInfo)
    for i in range(ProjectInfo['NumTrajs']):
        Trajfiles.append(ProjectInfo['TrajFilePath'] +
                         ProjectInfo['TrajFileBaseName'] + '%d' % i +
                         ProjectInfo['TrajFileType'])
    Rgs = computeRg(Trajfiles)

    print "Save data to %s" % Output
    savetxt(Output, Rgs)
    print "Done."
    if returnRgs:
        return Rgs
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--project', default='ProjectInfo.h5')
    parser.add_argument('-a', '--assignments', default='Data/Assignments.h5')
    args = parser.parse_args()

    a = Serializer.LoadData(args.assignments)
    p = Project.LoadFromHDF(args.project)
    maxx, maxy, minx, miny = -np.inf, -np.inf, np.inf, np.inf
    n_states = np.max(a) + 1

    x = np.concatenate([p.LoadTraj(i)['XYZList'][:, 0, 0] for i in range(p['NumTrajs'])])
    y = np.concatenate([p.LoadTraj(i)['XYZList'][:, 0, 1] for i in range(p['NumTrajs'])])
    a = np.concatenate([a[i, :] for i in range(p['NumTrajs'])])
    
    plot_v(minx=np.min(x), maxx=np.max(x), miny=np.min(y), maxy=np.max(y))
    colors = ['b', 'r', 'm', 'c', 'g']
    for j in xrange(n_states):
        w = np.where(a == j)[0]    
        pp.scatter(x[w], y[w], marker='x', c=colors[j], label='State %d' % j,
                   edgecolor=colors[j], alpha=0.5)

    
    pp.legend()
    pp.show()
Esempio n. 7
0
def save_new_ref(filename, data):
    """ Saves a new version of the reference data, and backs up the old """

    ext = filename.split('.')[-1]

    if (data == None):
        print "WARNING: Error generating file: %s" % filename
        print "Skipped... try again."
        return

    if os.path.exists(filename):
        os.system('mv %s %s' % (filename, BACKUP_DIR))

    if ext in ['h5', 'lh5']:
        if scipy.sparse.issparse(data):
            data = data.toarray()
        Serializer.SaveData(filename, data)
    elif ext == 'mtx':
        io.mmwrite(filename, data)
    elif ext == 'pkl':
        f = open(filename, 'w')
        pickle.dump(f, data)
        f.close()
    else:
        raise ValueError('Could not understand extension (.%s) for %s' %
                         (ext, filename))

    return
Esempio n. 8
0
    def check_container(filename):
        ondisk = Serializer.load_from_hdf(filename)
        if n_vtrajs != len(ondisk['hashes']):
            raise ValueError('You asked for {} vtrajs, but your checkpoint \
file has {}'.format(n_vtrajs, len(ondisk['hashes'])))
        if not np.all(ondisk['hashes'] == hashes):
            raise ValueError('Hash mismatch. Are these checkpoint files for \
the right project?')
Esempio n. 9
0
def get_projectinfo():
    
    try:
        projectinfo = Serializer.LoadFromHDF('../ProjectInfo.h5')
    except IOError:
        print "Can't find ProjectInfo.h5!"

    return projectinfo
Esempio n. 10
0
    def check_container(filename):
        ondisk = Serializer.load_from_hdf(filename)
        if n_vtrajs != len(ondisk['hashes']):
            raise ValueError('You asked for {} vtrajs, but your checkpoint \
file has {}'.format(n_vtrajs, len(ondisk['hashes'])))
        if not np.all(ondisk['hashes'] ==
                hashes):
            raise ValueError('Hash mismatch. Are these checkpoint files for \
the right project?')
Esempio n. 11
0
def get_Trajectory_frame(trajid,frames):
    """
    Get trajectory frames.
    From Trajectory file(traj_.lh5) get the frame.
    """
    Path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories"
    traj = Serializer.LoadFromHDF('%s/trj%d.lh5'%(Path,trajid))
    
    return [traj['XYZList'][i] for i in frames]
Esempio n. 12
0
def pseudosampling(states, assignment, numberoftrajs, frames, output):
    try:
        fn = Serializer.LoadFromHDF(assignment)
    except IOError:
        print "Can't find Assignment file"
        sys.exit()
    for stateid in states:
        a = pseudotrajs(numberoftrajs, stateid, fn, frames)
        a.SaveToHDF(output)
        print "Wrote:%s" % output
def calculate_statepopulation_rawdata(AssignmentsFixed):

    a = Serializer.LoadFromHDF(AssignmentsFixed)
    statenumber = max([max(a['Data'][i]) for i in range(len(a['Data']))]) + 1
    p = np.zeros(statenumber)
    for state in range(statenumber):
        for traj in range(len(a['Data'])):
            p[state] += a['Data'][traj].tolist().count(state)
    p = p / p.sum()
    return p
    def getrawdata(self, rawdatafile):

        try:
            d = Serializer.LoadFromHDF(rawdatafile)
            self.RawData = d['Data']
        except:
            try:
                self.RawData = loadtxt(rawdatafile)
            except:
                print "Can not load {}".format(rawdatafile)
                raise
Esempio n. 15
0
def get_StatesAssignments(AssignmentFiles):
    """
    StatesAssignments {'state':{'Trajectory':[Frames]}}
    """
    A = AssignmentFiles
    if isinstance(A,str):
        A = Serializer.LoadFromHDF(A)
    S = {}
    try:
        for trajid,data in zip(A['TrajID'],A['Data']):
            for i in range(len(data)):
                if data[i] == -1:
                    continue
                else:
                    S.setdefault('%d'%data[i],{}).setdefault('%d'%trajid,[]).append(i)
        return S
    except KeyError:
        # Assignments file which doesn't has key 'TrajID' should be regular Assignments file instead of new assignments files created for bootstrap. Then modify the assignments file i.e, create key 'TrajID'.
        A['TrajID'] = list(range(len(A['Data'])))
        return get_StatesAssignments(A)
Esempio n. 16
0
def test():
    assignments = Serializer.LoadFromHDF("/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/Data/Assignments.h5")
    StatesAsi = get_StatesAssignments(assignments)
    NumHelix_states = compute_numhelix_states(StatesAsi)
    #print "NumHelix_states",NumHelix_states
    #savetxt('NumHelix_states',NumHelix_states)
    states = [int(i) for i in NumHelix_states.keys()]
    states.sort()
    mean_numhelix_states = []
    std_numhelix_states = []
    for state in states:
        mean_numhelix_states.append(np.mean(NumHelix_states['%d'%state]))
        std_numhelix_states.append(np.std(NumHelix_states['%d'%state]))
    
    plt.figure()
    plt.errorbar(states,mean_numhelix_states,std_numhelix_states)
    plt.xlabel("State ID")
    plt.ylabel("Number of Helix")
    plt.savefig("Numhelix_states")
    plt.show()    
Esempio n. 17
0
def bootstrap(AssignmentsFile,numtraj,bootstrapnumber,PathtoSaveFiles):
    bootstraplist,TrajID = [],[]
    File = Serializer.LoadFromHDF(AssignmentsFile)
    datalist = File['Data']
    if isinstance(numtraj,str) and numtraj.lower()=='all':
        numtraj = len(datalist)
    elif int(numtraj) <= 0:
        print "Please input valid number from 1 to %d"%len(datalist)
        sys.exit()
    else:
        numtraj = int(numtraj)
    
    for i in range(bootstrapnumber):
        all_assignments = -1 * np.ones((numtraj,len(datalist[0])), dtype=np.int)
        k = 0
        trajid = []
        for j,m in sample_with_replacement(datalist,numtraj):        
            all_assignments[k][:] = j[:]
            trajid.append(m)
            k += 1
        TrajID.append(trajid)
        bootstraplist.append(all_assignments)
    SaveBootstrapFiles(bootstraplist,TrajID,PathtoSaveFiles,File,AssignmentsFile)
Esempio n. 18
0
def compute_numhelix_states(StatesAssignments):
    """
    Compute the average number of helix for all states.
    Need Path to trj_hc.h5
    """
    
    SA = StatesAssignments
    states = SA.keys()
    numhelix_states = {}
    n = 0
    for state in states:
        n +=1
        print "Compute number of helix for state %d/%d"%(n,len(states))
        TrajID = SA[state].keys()
        numhelix_state = []
        for trajid in TrajID:
            T = {}            
            TrajFile = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%s_hc.h5'%trajid
            Traj = Serializer.LoadFromHDF(TrajFile)
            T['HCs'] = [Traj['HCs'][i] for i in SA[state][trajid]]
            numhelix_state += count_Helix(T)
        numhelix_states[state] = numhelix_state
    
    return numhelix_states
        x = (t['XYZList'][i, Atom1, :] - t['XYZList'][i, Atom2, :])[0]
        x = x.tolist()
        distance.append(np.dot(x, x)**0.5)
    distance += [-1] * (LongestTrajLength - len(t['XYZList']))
    return distance


#------------MAIN---------------

AtomName1 = 'C'
ResidueID1 = 1
AtomName2 = 'N'
ResidueID2 = 23
path = '/Users/tud51931/projects/MSM/msm/ff03ERR-hybridkcenter/RMSDCluster4.0'
Distances = []
ProjectInfo = Serializer.LoadFromHDF('%s/ProjectInfo.h5' % path)
LongestTrajLength = max(ProjectInfo['TrajLengths'])
os.chdir(path)
if os.path.exists('EndtoEndDistances.dat'):
    print "EndtoEndDistances.dat exists!"
    sys.exit()
print 'Calculating the eeDistance of each trajectory......'
for i in range(ProjectInfo['NumTrajs']):
    trajfile = ProjectInfo['TrajFilePath'] + ProjectInfo[
        'TrajFileBaseName'] + '%d' % i + ProjectInfo['TrajFileType']
    print '%d in %d Trajectories' % (i, ProjectInfo['NumTrajs']), trajfile
    d = calculatedistance(AtomName1, ResidueID1, AtomName2, ResidueID2,
                          trajfile, LongestTrajLength)
    Distances.append(d)
print "Save data to ./EndtoEndDistance.dat"
savetxt('EndtoEndDistances.dat', Distances)
Esempio n. 20
0
from msmbuilder import Serializer


def draw_index(probs, n_picks=1, UseFastMethod=True):
    """Draw a number (or many numbers, controlled by n_picks), weighted by the probabilities probs."""
    if UseFastMethod:
        t = np.cumsum(probs)
        s = sum(probs)
        return np.searchsorted(t, np.random.rand(n_picks) * s)


tcounts = mmread(
    '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/lagtime50/tCounts.UnMapped.mtx'
)
Assignment = Serializer.LoadFromHDF(
    '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/lagtime50/Assignments.Fixed.h5'
)
trajnum = 100
frames = Assignment['Data'].shape[1]
a = Serializer()
a['Data'] = -1 * np.ones((trajnum, frames))
for traj in range(trajnum):
    print '%d of %d Trajectories' % (traj, trajnum)
    startstate = 126
    a['Data'][traj, 0] = startstate
    for step in range(1, frames):
        probs = tcounts.data[tcounts.row == startstate] / sum(
            tcounts.data[tcounts.row == startstate])
        a['Data'][traj, step] = tcounts.col[tcounts.row == startstate][
            draw_index(probs)[0]]
        startstate = a['Data'][traj, step]
Esempio n. 21
0
    the structures of each of the cluster centers.  Produced using Cluster.py.''',
                        default='Data/Gens.lh5')

    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    if -1 in args.states:
        print "Ripping PDBs for all states"
        args.states = 'all'

    if args.conformations_per_state == -1:
        print "Getting all PDBs for each state"
        args.conformations_per_state = 'all'

    atom_indices = np.loadtxt(args.lprmsd_atom_indices, np.int)
    assignments = Serializer.LoadData(args.assignments)
    project = Project.load_from_hdf(args.project)

    if args.lprmsd_permute_atoms == 'None':
        permute_indices = None
    else:
        permute_indices = ReadPermFile(args.lprmsd_permute_atoms)

    if args.lprmsd_alt_indices == 'None':
        alt_indices = None
    else:
        alt_indices = np.loadtxt(args.lprmsd_alt_indices, np.int)

    run(project, assignments, args.conformations_per_state, args.states,
        args.output_dir, args.generators, atom_indices, permute_indices,
        alt_indices, args.total_memory_gb)
Esempio n. 22
0
                    help="Input RMSD.h5 file",
                    required=True)
parser.add_argument('-rg', '--Rg', help="Input Rgs.dat file", required=True)
parser.add_argument('-l',
                    '--Locate',
                    help="Locate states on the Rg-RMSD graph",
                    type=str)
parser.add_argument('-o',
                    '--Output',
                    help="Output file (graph) name.Default: Rg-RMSD.png",
                    default="Rg-RMSD.png")

args = parser.parse_args()

try:
    R = Serializer.LoadFromHDF(args.RMSD)
    rmsd = []
    for i in range(len(R['Data'])):
        for j in range(len(R['Data'][i])):
            if R['Data'][i, j] != -1:
                rmsd.append(R['Data'][i, j])
except IOError:
    print "Can't find RMSD.h5, please run CalculateProjectRMSD.py first to get RMSD.h5."
    raise IOError
try:
    Rgs = loadtxt(args.Rg)
    rgs = []
    for i in range(len(Rgs)):
        for j in range(len(Rgs[i])):
            if Rgs[i, j] != -1:
                rgs.append(Rgs[i, j])
Esempio n. 23
0
import os, sys
import numpy as np
from msmbuilder import Serializer, Trajectory
import matplotlib.pyplot as plt
sys.path.append("/Users/tud51931/scripts/gfzhou")
import HelixCoilTools as hct

ProjectInfo = Serializer.LoadFromHDF(
    '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/ProjectInfo.h5'
)
Counts = -1 * np.ones(
    (ProjectInfo['NumTrajs'], max(ProjectInfo['TrajLengths'])))
print Counts.shape

Savepath = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/result/NvOfTrajectory'
plt.figure()
plt.xlabel('Steps')
plt.ylabel('Nv')
plt.hold(False)
for i in range(0, 93):
    T = Trajectory.LoadFromHDF(
        '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%d_hc.h5'
        % i)
    Hcount = hct.count_Helix(T)
    plt.title('Nv-steps of Traj%d' % i)
    plt.plot(range(len(Hcount)), Hcount, '.')
    print 'Save figure to %s/Nvoftraj%d.png' % (Savepath, i)
    plt.savefig('%s/Nvoftraj%d.png' % (Savepath, i))
    Counts[i, :len(Hcount)] = Hcount[:]

Counts_ma = np.ma.array(Counts, mask=[Counts == -1])
Esempio n. 24
0
import os, sys
import numpy as np
from msmbuilder import Serializer
from scipy import savetxt, loadtxt
import matplotlib.pyplot as plt

try:
    R = Serializer.LoadFromHDF(
        '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/Data/RMSD.h5'
    )
    rmsd = []
    for i in range(len(R['Data'])):
        for j in range(len(R['Data'][i])):
            if R['Data'][i, j] != -1:
                rmsd.append(R['Data'][i, j])
except IOError:
    print "Can't find RMSD.h5, please run CalculateProjectRMSD.py first to get RMSD.h5."
    sys.exit()
try:
    Nv = loadtxt(
        '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/result/numhelix_alltraj.txt'
    )
    nv = []
    for i in range(len(Nv)):
        for j in range(len(Nv[i])):
            if Nv[i, j] != -1:
                nv.append(Nv[i, j])
except IOError:
    print "Can't find numhelix_alltraj.txt, please run computeNumhelix_alltrajs.py first."
    sys.exit()
Esempio n. 25
0
def get_projectinfo():

    projectinfo = Serializer.LoadFromHDF('RMSDCluster4.2/ProjectInfo.h5')
    
    return projectinfo
Esempio n. 26
0
def get_RMSD():
    
    rmsds = Serializer.LoadFromHDF('RMSDCluster4.2/Data/RMSD-pdb-gen0.h5')
   
    return rmsds['Data']
Esempio n. 27
0
import sys, os
import numpy as np
import scipy.io
from msmbuilder import arglib
from msmbuilder import Serializer
from msmbuilder import MSMLib

Assignments = Serializer.LoadData("%s" % (sys.argv[1]))
NumStates = max(Assignments.flatten()) + 1
LagTime = sys.argv[2]
Counts = MSMLib.GetCountMatrixFromAssignments(Assignments,
                                              NumStates,
                                              LagTime=LagTime,
                                              Slide=True)
scipy.io.mmwrite('%s' % (sys.argv[3]), Counts)
Esempio n. 28
0
from scipy import savetxt
from msmbuilder import Serializer

cutoff = 3.0
metrics = 'rmsd'

if metrics.lower() == 'dihedral':
    Path = "/Users/tud51931/projects/MSM/msm/ff03-dihedralhybrid/"
    metrics = 'Dihedral'
elif metrics.lower() == 'rmsd':
    Path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/"
    metrics = 'RMSD'
Path = os.path.join(Path, '%sCluster%0.1f' % (metrics, cutoff))

AssignmentFile = os.path.join(Path, "Data", "Assignments.h5")
A = Serializer.LoadFromHDF(AssignmentFile)
StateAssignment = hct.get_StatesAssignments(AssignmentFiles=A)
RMSDFile = os.path.join(Path, "Data", "RMSD.h5")
RMSD = Serializer.LoadFromHDF(RMSDFile)
rmsd_allstates = {}
for state in StateAssignment.keys():
    rmsd_singlestate = []
    for trajid in StateAssignment[state].keys():
        rmsd_singlestate += list(
            RMSD['Data'][int(trajid)][StateAssignment[state][trajid]])
    rmsd_allstates[int(state)] = rmsd_singlestate

maxstatelength = max([len(i) for i in rmsd_allstates.values()])
StateRMSDs = copy.deepcopy(RMSD)
StateRMSDs['Data'] = -1 * np.ones((len(rmsd_allstates), maxstatelength))
for state in rmsd_allstates.keys():
Esempio n. 29
0
 def save_container(filename, dtype):
     s = Serializer({'Data': np.array(minus_ones, dtype=dtype),
                     'completed_vtrajs': np.zeros((n_vtrajs), dtype=np.bool),
                     'hashes': hashes})
     s.save_to_hdf(filename)
Esempio n. 30
0
Note: this uses a lag_time of 1 to get the transition counts, and uses
rate estimators that use the *dwell_times*.

The *correct* likelihood function to use for estimating the rate matrix when
the data is sampled at a discrete frequency is open for debate. This
likelihood function doesn't take into account the error in the lifetime estimates
from the discrete sampling. Other methods are currently under development.
    
Output: tCounts.mtx, K.mtx, Populations.dat,  Mapping.dat,
Assignments.Fixed.h5, tCounts.UnSym.mtx""")

    parser.add_argument('assignments')
    parser.add_argument('symmetrize', choices=['none', 'transpose', 'mle'])
    parser.add_argument('outdir')
    args = parser.parse_args()
    assignments = Serializer.LoadData(args.assignments)

    ratemtx_fn = pjoin(args.outdir, 'K.mtx')
    tcounts_fn = pjoin(args.outdir, 'tCounts.mtx')
    unsym_fn = pjoin(args.outdir, 'tCounts.UnSym.mtx')
    mapping_fn = pjoin(args.outdir, 'Mapping.dat')
    fixed_fn = pjoin(args.outdir, 'Assignments.Fixed.h5')
    pops_fn = pjoin(args.outdir, 'Populations.dat')
    if not os.path.exists(args.outdir):
        os.mkdir(args.outdir)
    outlist = [ratemtx_fn, tcounts_fn, unsym_fn, fixed_fn, pops_fn]
    for e in outlist:
        arglib.die_if_path_exists(e)

    # if lag time is not one, there's going to be a unit mispatch between
    # what you get and what you're expecting.
Esempio n. 31
0
#----------------------------
#October,17,2012
#Guangfeng Zhou
#Dr.Voelz Lab
#Room 100/102, Beury Hall
#Temple University

import sys,os
sys.path.append('/Users/tud51931/scripts/gfzhou')
from msmhcanalysis import SequenceEntropy_states
from msmbuilder import Serializer
import matplotlib.pyplot as plt
import numpy as np

stateentropy = SequenceEntropy_states('HCstrings_states_Dihedral5.2.txt')
rmsdfile = Serializer.LoadFromHDF('StateRMSDs_DihedralCluster5.2.h5')
RMSD = np.ma.array(rmsdfile['Data'],mask=[rmsdfile['Data']==-1])
statermsd = RMSD.mean(1)

plt.figure()
plt.plot(statermsd,stateentropy,'.')
plt.title('StateSequecneEntropy versus StateRMSD')
plt.ylabel('StateSequenceEntropy')
plt.xlabel('StateRMSD(nm)')
plt.savefig('seqentropy_statermsd_dihedralcluster.png')
#plt.show()


Esempio n. 32
0
def RMSDprediction():
    try:
        R = Serializer.LoadFromHDF(
            '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/Data/RMSD.h5'
        )
    except IOError:
        print "Can't find RMSD.h5, please run CalculateProjectRMSD.py first to get RMSD.h5."
        sys.exit()
    RMSD = R['Data']
    StatesAsi = hct.get_StatesAssignments(Assignments)
    RMSD_states = {}
    for state in StatesAsi.keys():
        for trajid in StatesAsi[state].keys():
            for frame in StatesAsi[state][trajid]:
                RMSD_states.setdefault(state, []).append(
                    RMSD[int(trajid)][int(frame)])

    states = [int(i) for i in RMSD_states.keys()]
    states.sort()
    mean_rmsd_states = []
    std_rmsd_states = []
    for state in states:
        mean_rmsd_states.append(np.mean(RMSD_states['%d' % state]))
        std_rmsd_states.append(np.std(RMSD_states['%d' % state]))
    #savetxt('mean_numhelix_states0.dat',mean_numhelix_states)
    #savetxt('std_numhelix_states0.dat',std_numhelix_states)
    print mean_rmsd_states

    P0 = np.zeros(len(Population))
    for data in Assignments['Data']:
        P0[data[0]] += 1
    P0 = P0 / P0.sum()
    populationslist = []
    for k in range(140):
        populationslist.append(P0)
        P0 *= Tmatrix

    RMSD_predicted = np.dot(np.array(populationslist),
                            np.array(mean_rmsd_states).reshape(-1, 1))
    print RMSD_predicted
    RMSD_predicted = RMSD_predicted.reshape(1, -1)[0]
    plt.figure()
    plt.plot(
        np.arange(0, 7000, 50),
        RMSD_predicted,
        'ro',
    )
    plt.hold(True)

    Counts_ma = np.ma.array(RMSD, mask=[RMSD == -1])
    RMSD_mean = Counts_ma.mean(0)
    RMSD_std = Counts_ma.std(0)
    print RMSD_mean

    plt.plot(range(len(RMSD_mean)), RMSD_mean, 'b')

    plt.title('RMSD-steps')
    plt.xlabel('Steps')
    plt.ylabel('RMSD')
    plt.legend(('RMSD_msm', 'RMSD_rawdata'), loc='upper right')
    figname = 'RMSD_prediction_%sCluster%0.1f_tau%d.png' % (metrics, cutoff,
                                                            tau)
    plt.savefig(figname)
    print "Save to %s" % figname
Esempio n. 33
0
#def GetHCStringsforProject(ProjectInfo)

if __name__ == '__main__':
    tau = 50
    cutoff = 4.2
    metrics = 'rmsd'

    if metrics.lower() == 'dihedral':
        Path = "/Users/tud51931/projects/MSM/msm/ff03-dihedralhybrid"
        metrics = 'Dihedral'
    elif metrics.lower() == 'rmsd':
        Path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter"
        metrics = 'RMSD'
    Path = os.path.join(Path, '%sCluster%0.1f' % (metrics, cutoff))

    ProjectInfo = Serializer.LoadFromHDF('%s/ProjectInfo.h5' % Path)
    Population = loadtxt('%s/lagtime%d/Populations.dat' % (Path, tau))
    Assignments = Serializer.LoadFromHDF("%s/lagtime%d/Assignments.Fixed.h5" %
                                         (Path, tau))
    Tmatrix = mmread('%s/lagtime%d/tProb.mtx' % (Path, tau))
    Gens = '%s/Data/Gens.lh5' % Path

    Nvprediction()
    EEdistanceprediction()
    RMSDprediction()
    Rgprediction()

    #barchartsforStatesEntropy()
    #SequenceEntropy_states()
    #GetRgsforGeneratorFile()
    #GetHCStringsforTrajectory(Gens)