def __init__(self, information, projectfile, populationfile,
                 assignmentfile_fixed, tmatrixfile, rawdatafile):

        try:
            self.Info = information
            self.ProjectInfo = Serializer.LoadFromHDF(projectfile)
            self.Population = loadtxt(populationfile)
            self.Assignments = Serializer.LoadFromHDF(assignmentfile_fixed)
            self.Tmatrix = mmread(tmatrixfile)
            self.StateAssignment = hct.get_StatesAssignments(self.Assignments)
            self.getrawdata(rawdatafile)
        except:
            print "Having trouble with getting required files"
            raise
Ejemplo n.º 2
0
def CalculateProjectRg(ProjectInfo, Output, returnRgs=False):
    """
    Calculate Radius of gyration for the Project ie. all the Trajectories.
    ProjectInfo: ProjectInfo.h5 file.
    Output: output file (XXX.dat). 
    The Output default will be set in the scripts and it is './Rgs.dat'.
    """
    Output = checkoutput(Output)

    if not isinstance(ProjectInfo, str):
        print "Please input the Path to ProjectInfo.h5"
        raise IOError
    print 'Calculating the Rg for each trajectory......'
    ProjectInfoPath = '/'.join(os.path.realpath(ProjectInfo).split('/')[:-1])
    os.chdir(ProjectInfoPath)
    Trajfiles = []
    ProjectInfo = Serializer.LoadFromHDF(ProjectInfo)
    for i in range(ProjectInfo['NumTrajs']):
        Trajfiles.append(ProjectInfo['TrajFilePath'] +
                         ProjectInfo['TrajFileBaseName'] + '%d' % i +
                         ProjectInfo['TrajFileType'])
    Rgs = computeRg(Trajfiles)

    print "Save data to %s" % Output
    savetxt(Output, Rgs)
    print "Done."
    if returnRgs:
        return Rgs
Ejemplo n.º 3
0
def get_projectinfo():
    
    try:
        projectinfo = Serializer.LoadFromHDF('../ProjectInfo.h5')
    except IOError:
        print "Can't find ProjectInfo.h5!"

    return projectinfo
Ejemplo n.º 4
0
def get_Trajectory_frame(trajid,frames):
    """
    Get trajectory frames.
    From Trajectory file(traj_.lh5) get the frame.
    """
    Path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories"
    traj = Serializer.LoadFromHDF('%s/trj%d.lh5'%(Path,trajid))
    
    return [traj['XYZList'][i] for i in frames]
def calculate_statepopulation_rawdata(AssignmentsFixed):

    a = Serializer.LoadFromHDF(AssignmentsFixed)
    statenumber = max([max(a['Data'][i]) for i in range(len(a['Data']))]) + 1
    p = np.zeros(statenumber)
    for state in range(statenumber):
        for traj in range(len(a['Data'])):
            p[state] += a['Data'][traj].tolist().count(state)
    p = p / p.sum()
    return p
Ejemplo n.º 6
0
def pseudosampling(states, assignment, numberoftrajs, frames, output):
    try:
        fn = Serializer.LoadFromHDF(assignment)
    except IOError:
        print "Can't find Assignment file"
        sys.exit()
    for stateid in states:
        a = pseudotrajs(numberoftrajs, stateid, fn, frames)
        a.SaveToHDF(output)
        print "Wrote:%s" % output
    def getrawdata(self, rawdatafile):

        try:
            d = Serializer.LoadFromHDF(rawdatafile)
            self.RawData = d['Data']
        except:
            try:
                self.RawData = loadtxt(rawdatafile)
            except:
                print "Can not load {}".format(rawdatafile)
                raise
Ejemplo n.º 8
0
def test():
    assignments = Serializer.LoadFromHDF("/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/Data/Assignments.h5")
    StatesAsi = get_StatesAssignments(assignments)
    NumHelix_states = compute_numhelix_states(StatesAsi)
    #print "NumHelix_states",NumHelix_states
    #savetxt('NumHelix_states',NumHelix_states)
    states = [int(i) for i in NumHelix_states.keys()]
    states.sort()
    mean_numhelix_states = []
    std_numhelix_states = []
    for state in states:
        mean_numhelix_states.append(np.mean(NumHelix_states['%d'%state]))
        std_numhelix_states.append(np.std(NumHelix_states['%d'%state]))
    
    plt.figure()
    plt.errorbar(states,mean_numhelix_states,std_numhelix_states)
    plt.xlabel("State ID")
    plt.ylabel("Number of Helix")
    plt.savefig("Numhelix_states")
    plt.show()    
Ejemplo n.º 9
0
def get_StatesAssignments(AssignmentFiles):
    """
    StatesAssignments {'state':{'Trajectory':[Frames]}}
    """
    A = AssignmentFiles
    if isinstance(A,str):
        A = Serializer.LoadFromHDF(A)
    S = {}
    try:
        for trajid,data in zip(A['TrajID'],A['Data']):
            for i in range(len(data)):
                if data[i] == -1:
                    continue
                else:
                    S.setdefault('%d'%data[i],{}).setdefault('%d'%trajid,[]).append(i)
        return S
    except KeyError:
        # Assignments file which doesn't has key 'TrajID' should be regular Assignments file instead of new assignments files created for bootstrap. Then modify the assignments file i.e, create key 'TrajID'.
        A['TrajID'] = list(range(len(A['Data'])))
        return get_StatesAssignments(A)
Ejemplo n.º 10
0
def bootstrap(AssignmentsFile,numtraj,bootstrapnumber,PathtoSaveFiles):
    bootstraplist,TrajID = [],[]
    File = Serializer.LoadFromHDF(AssignmentsFile)
    datalist = File['Data']
    if isinstance(numtraj,str) and numtraj.lower()=='all':
        numtraj = len(datalist)
    elif int(numtraj) <= 0:
        print "Please input valid number from 1 to %d"%len(datalist)
        sys.exit()
    else:
        numtraj = int(numtraj)
    
    for i in range(bootstrapnumber):
        all_assignments = -1 * np.ones((numtraj,len(datalist[0])), dtype=np.int)
        k = 0
        trajid = []
        for j,m in sample_with_replacement(datalist,numtraj):        
            all_assignments[k][:] = j[:]
            trajid.append(m)
            k += 1
        TrajID.append(trajid)
        bootstraplist.append(all_assignments)
    SaveBootstrapFiles(bootstraplist,TrajID,PathtoSaveFiles,File,AssignmentsFile)
Ejemplo n.º 11
0
def compute_numhelix_states(StatesAssignments):
    """
    Compute the average number of helix for all states.
    Need Path to trj_hc.h5
    """
    
    SA = StatesAssignments
    states = SA.keys()
    numhelix_states = {}
    n = 0
    for state in states:
        n +=1
        print "Compute number of helix for state %d/%d"%(n,len(states))
        TrajID = SA[state].keys()
        numhelix_state = []
        for trajid in TrajID:
            T = {}            
            TrajFile = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%s_hc.h5'%trajid
            Traj = Serializer.LoadFromHDF(TrajFile)
            T['HCs'] = [Traj['HCs'][i] for i in SA[state][trajid]]
            numhelix_state += count_Helix(T)
        numhelix_states[state] = numhelix_state
    
    return numhelix_states
Ejemplo n.º 12
0
def get_projectinfo():

    projectinfo = Serializer.LoadFromHDF('RMSDCluster4.2/ProjectInfo.h5')
    
    return projectinfo
Ejemplo n.º 13
0
def get_RMSD():
    
    rmsds = Serializer.LoadFromHDF('RMSDCluster4.2/Data/RMSD-pdb-gen0.h5')
   
    return rmsds['Data']
Ejemplo n.º 14
0
#----------------------------
#October,17,2012
#Guangfeng Zhou
#Dr.Voelz Lab
#Room 100/102, Beury Hall
#Temple University

import sys,os
sys.path.append('/Users/tud51931/scripts/gfzhou')
from msmhcanalysis import SequenceEntropy_states
from msmbuilder import Serializer
import matplotlib.pyplot as plt
import numpy as np

stateentropy = SequenceEntropy_states('HCstrings_states_Dihedral5.2.txt')
rmsdfile = Serializer.LoadFromHDF('StateRMSDs_DihedralCluster5.2.h5')
RMSD = np.ma.array(rmsdfile['Data'],mask=[rmsdfile['Data']==-1])
statermsd = RMSD.mean(1)

plt.figure()
plt.plot(statermsd,stateentropy,'.')
plt.title('StateSequecneEntropy versus StateRMSD')
plt.ylabel('StateSequenceEntropy')
plt.xlabel('StateRMSD(nm)')
plt.savefig('seqentropy_statermsd_dihedralcluster.png')
#plt.show()


Ejemplo n.º 15
0
from msmbuilder import Serializer


def draw_index(probs, n_picks=1, UseFastMethod=True):
    """Draw a number (or many numbers, controlled by n_picks), weighted by the probabilities probs."""
    if UseFastMethod:
        t = np.cumsum(probs)
        s = sum(probs)
        return np.searchsorted(t, np.random.rand(n_picks) * s)


tcounts = mmread(
    '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/lagtime50/tCounts.UnMapped.mtx'
)
Assignment = Serializer.LoadFromHDF(
    '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/lagtime50/Assignments.Fixed.h5'
)
trajnum = 100
frames = Assignment['Data'].shape[1]
a = Serializer()
a['Data'] = -1 * np.ones((trajnum, frames))
for traj in range(trajnum):
    print '%d of %d Trajectories' % (traj, trajnum)
    startstate = 126
    a['Data'][traj, 0] = startstate
    for step in range(1, frames):
        probs = tcounts.data[tcounts.row == startstate] / sum(
            tcounts.data[tcounts.row == startstate])
        a['Data'][traj, step] = tcounts.col[tcounts.row == startstate][
            draw_index(probs)[0]]
        startstate = a['Data'][traj, step]
Ejemplo n.º 16
0
import os, sys
import numpy as np
from msmbuilder import Serializer, Trajectory
import matplotlib.pyplot as plt
sys.path.append("/Users/tud51931/scripts/gfzhou")
import HelixCoilTools as hct

ProjectInfo = Serializer.LoadFromHDF(
    '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/ProjectInfo.h5'
)
Counts = -1 * np.ones(
    (ProjectInfo['NumTrajs'], max(ProjectInfo['TrajLengths'])))
print Counts.shape

Savepath = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/result/NvOfTrajectory'
plt.figure()
plt.xlabel('Steps')
plt.ylabel('Nv')
plt.hold(False)
for i in range(0, 93):
    T = Trajectory.LoadFromHDF(
        '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%d_hc.h5'
        % i)
    Hcount = hct.count_Helix(T)
    plt.title('Nv-steps of Traj%d' % i)
    plt.plot(range(len(Hcount)), Hcount, '.')
    print 'Save figure to %s/Nvoftraj%d.png' % (Savepath, i)
    plt.savefig('%s/Nvoftraj%d.png' % (Savepath, i))
    Counts[i, :len(Hcount)] = Hcount[:]

Counts_ma = np.ma.array(Counts, mask=[Counts == -1])
Ejemplo n.º 17
0
from scipy import savetxt
from msmbuilder import Serializer

cutoff = 3.0
metrics = 'rmsd'

if metrics.lower() == 'dihedral':
    Path = "/Users/tud51931/projects/MSM/msm/ff03-dihedralhybrid/"
    metrics = 'Dihedral'
elif metrics.lower() == 'rmsd':
    Path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/"
    metrics = 'RMSD'
Path = os.path.join(Path, '%sCluster%0.1f' % (metrics, cutoff))

AssignmentFile = os.path.join(Path, "Data", "Assignments.h5")
A = Serializer.LoadFromHDF(AssignmentFile)
StateAssignment = hct.get_StatesAssignments(AssignmentFiles=A)
RMSDFile = os.path.join(Path, "Data", "RMSD.h5")
RMSD = Serializer.LoadFromHDF(RMSDFile)
rmsd_allstates = {}
for state in StateAssignment.keys():
    rmsd_singlestate = []
    for trajid in StateAssignment[state].keys():
        rmsd_singlestate += list(
            RMSD['Data'][int(trajid)][StateAssignment[state][trajid]])
    rmsd_allstates[int(state)] = rmsd_singlestate

maxstatelength = max([len(i) for i in rmsd_allstates.values()])
StateRMSDs = copy.deepcopy(RMSD)
StateRMSDs['Data'] = -1 * np.ones((len(rmsd_allstates), maxstatelength))
for state in rmsd_allstates.keys():
Ejemplo n.º 18
0
import os, sys
import numpy as np
from msmbuilder import Serializer
from scipy import savetxt, loadtxt
import matplotlib.pyplot as plt

try:
    R = Serializer.LoadFromHDF(
        '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/Data/RMSD.h5'
    )
    rmsd = []
    for i in range(len(R['Data'])):
        for j in range(len(R['Data'][i])):
            if R['Data'][i, j] != -1:
                rmsd.append(R['Data'][i, j])
except IOError:
    print "Can't find RMSD.h5, please run CalculateProjectRMSD.py first to get RMSD.h5."
    sys.exit()
try:
    Nv = loadtxt(
        '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/result/numhelix_alltraj.txt'
    )
    nv = []
    for i in range(len(Nv)):
        for j in range(len(Nv[i])):
            if Nv[i, j] != -1:
                nv.append(Nv[i, j])
except IOError:
    print "Can't find numhelix_alltraj.txt, please run computeNumhelix_alltrajs.py first."
    sys.exit()
Ejemplo n.º 19
0
                    help="Input RMSD.h5 file",
                    required=True)
parser.add_argument('-rg', '--Rg', help="Input Rgs.dat file", required=True)
parser.add_argument('-l',
                    '--Locate',
                    help="Locate states on the Rg-RMSD graph",
                    type=str)
parser.add_argument('-o',
                    '--Output',
                    help="Output file (graph) name.Default: Rg-RMSD.png",
                    default="Rg-RMSD.png")

args = parser.parse_args()

try:
    R = Serializer.LoadFromHDF(args.RMSD)
    rmsd = []
    for i in range(len(R['Data'])):
        for j in range(len(R['Data'][i])):
            if R['Data'][i, j] != -1:
                rmsd.append(R['Data'][i, j])
except IOError:
    print "Can't find RMSD.h5, please run CalculateProjectRMSD.py first to get RMSD.h5."
    raise IOError
try:
    Rgs = loadtxt(args.Rg)
    rgs = []
    for i in range(len(Rgs)):
        for j in range(len(Rgs[i])):
            if Rgs[i, j] != -1:
                rgs.append(Rgs[i, j])
Ejemplo n.º 20
0
#def GetHCStringsforProject(ProjectInfo)

if __name__ == '__main__':
    tau = 50
    cutoff = 4.2
    metrics = 'rmsd'

    if metrics.lower() == 'dihedral':
        Path = "/Users/tud51931/projects/MSM/msm/ff03-dihedralhybrid"
        metrics = 'Dihedral'
    elif metrics.lower() == 'rmsd':
        Path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter"
        metrics = 'RMSD'
    Path = os.path.join(Path, '%sCluster%0.1f' % (metrics, cutoff))

    ProjectInfo = Serializer.LoadFromHDF('%s/ProjectInfo.h5' % Path)
    Population = loadtxt('%s/lagtime%d/Populations.dat' % (Path, tau))
    Assignments = Serializer.LoadFromHDF("%s/lagtime%d/Assignments.Fixed.h5" %
                                         (Path, tau))
    Tmatrix = mmread('%s/lagtime%d/tProb.mtx' % (Path, tau))
    Gens = '%s/Data/Gens.lh5' % Path

    Nvprediction()
    EEdistanceprediction()
    RMSDprediction()
    Rgprediction()

    #barchartsforStatesEntropy()
    #SequenceEntropy_states()
    #GetRgsforGeneratorFile()
    #GetHCStringsforTrajectory(Gens)
        x = (t['XYZList'][i, Atom1, :] - t['XYZList'][i, Atom2, :])[0]
        x = x.tolist()
        distance.append(np.dot(x, x)**0.5)
    distance += [-1] * (LongestTrajLength - len(t['XYZList']))
    return distance


#------------MAIN---------------

AtomName1 = 'C'
ResidueID1 = 1
AtomName2 = 'N'
ResidueID2 = 23
path = '/Users/tud51931/projects/MSM/msm/ff03ERR-hybridkcenter/RMSDCluster4.0'
Distances = []
ProjectInfo = Serializer.LoadFromHDF('%s/ProjectInfo.h5' % path)
LongestTrajLength = max(ProjectInfo['TrajLengths'])
os.chdir(path)
if os.path.exists('EndtoEndDistances.dat'):
    print "EndtoEndDistances.dat exists!"
    sys.exit()
print 'Calculating the eeDistance of each trajectory......'
for i in range(ProjectInfo['NumTrajs']):
    trajfile = ProjectInfo['TrajFilePath'] + ProjectInfo[
        'TrajFileBaseName'] + '%d' % i + ProjectInfo['TrajFileType']
    print '%d in %d Trajectories' % (i, ProjectInfo['NumTrajs']), trajfile
    d = calculatedistance(AtomName1, ResidueID1, AtomName2, ResidueID2,
                          trajfile, LongestTrajLength)
    Distances.append(d)
print "Save data to ./EndtoEndDistance.dat"
savetxt('EndtoEndDistances.dat', Distances)
Ejemplo n.º 22
0
def RMSDprediction():
    try:
        R = Serializer.LoadFromHDF(
            '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/RMSDCluster4.2/Data/RMSD.h5'
        )
    except IOError:
        print "Can't find RMSD.h5, please run CalculateProjectRMSD.py first to get RMSD.h5."
        sys.exit()
    RMSD = R['Data']
    StatesAsi = hct.get_StatesAssignments(Assignments)
    RMSD_states = {}
    for state in StatesAsi.keys():
        for trajid in StatesAsi[state].keys():
            for frame in StatesAsi[state][trajid]:
                RMSD_states.setdefault(state, []).append(
                    RMSD[int(trajid)][int(frame)])

    states = [int(i) for i in RMSD_states.keys()]
    states.sort()
    mean_rmsd_states = []
    std_rmsd_states = []
    for state in states:
        mean_rmsd_states.append(np.mean(RMSD_states['%d' % state]))
        std_rmsd_states.append(np.std(RMSD_states['%d' % state]))
    #savetxt('mean_numhelix_states0.dat',mean_numhelix_states)
    #savetxt('std_numhelix_states0.dat',std_numhelix_states)
    print mean_rmsd_states

    P0 = np.zeros(len(Population))
    for data in Assignments['Data']:
        P0[data[0]] += 1
    P0 = P0 / P0.sum()
    populationslist = []
    for k in range(140):
        populationslist.append(P0)
        P0 *= Tmatrix

    RMSD_predicted = np.dot(np.array(populationslist),
                            np.array(mean_rmsd_states).reshape(-1, 1))
    print RMSD_predicted
    RMSD_predicted = RMSD_predicted.reshape(1, -1)[0]
    plt.figure()
    plt.plot(
        np.arange(0, 7000, 50),
        RMSD_predicted,
        'ro',
    )
    plt.hold(True)

    Counts_ma = np.ma.array(RMSD, mask=[RMSD == -1])
    RMSD_mean = Counts_ma.mean(0)
    RMSD_std = Counts_ma.std(0)
    print RMSD_mean

    plt.plot(range(len(RMSD_mean)), RMSD_mean, 'b')

    plt.title('RMSD-steps')
    plt.xlabel('Steps')
    plt.ylabel('RMSD')
    plt.legend(('RMSD_msm', 'RMSD_rawdata'), loc='upper right')
    figname = 'RMSD_prediction_%sCluster%0.1f_tau%d.png' % (metrics, cutoff,
                                                            tau)
    plt.savefig(figname)
    print "Save to %s" % figname