def create_hcstrings_states(Assignments, outfile='HCstrings_states.txt'): SA = hct.get_StatesAssignments(Assignments) states = SA.keys() HCstrings_states = {} n = 0 for state in states: n += 1 print "Get HC strings for state %d/%d" % (n, len(states)) TrajID = SA[state].keys() numhelix_state = [] HCstrings_states[state] = [] for trajid in TrajID: TrajFile = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%s_hc.lh5' % trajid Traj = Trajectory.LoadFromLHDF(TrajFile) HCstrings_states[state] += [ Traj['HCs'][i] for i in SA[state][trajid] ] fn = outfile if os.path.exists(fn): newfn = fn + '.bck' os.system('mv %s %s' % (fn, newfn)) print "Write HCstings of states into %s" % fn HCfile = open(fn, 'w') pickle.dump(HCstrings_states, HCfile) HCfile.close() print "Done."
def __init__(self,information,projectfile,populationfile,assignmentfile_fixed,tmatrixfile,rawdatafile): try: self.Info = information self.ProjectInfo = Serializer.LoadFromHDF(projectfile) self.Population = loadtxt(populationfile) self.Assignments = Serializer.LoadFromHDF(assignmentfile_fixed) self.Tmatrix = mmread(tmatrixfile) self.StateAssignment = hct.get_StatesAssignments(self.Assignments) self.getrawdata(rawdatafile) except: print "Having trouble with getting required files" raise
def __init__(self, information, projectfile, populationfile, assignmentfile_fixed, tmatrixfile, rawdatafile): try: self.Info = information self.ProjectInfo = Serializer.LoadFromHDF(projectfile) self.Population = loadtxt(populationfile) self.Assignments = Serializer.LoadFromHDF(assignmentfile_fixed) self.Tmatrix = mmread(tmatrixfile) self.StateAssignment = hct.get_StatesAssignments(self.Assignments) self.getrawdata(rawdatafile) except: print "Having trouble with getting required files" raise
sys.path.append('/Users/tud51931/scripts/gfzhou') import HelixCoilTools as hct parser = argparse.ArgumentParser() parser.add_argument('project',help="Path to ProjectInfo.h5,default=ProjectInfo.h5",default="ProjectInfo.h5") parser.add_argument('-o','--Output',help="Output file. default=Nv.dat",default="Nv.dat") args = parser.parse_args() if os.path.exists('Nv.dat') : print "Nv.dat exists!" sys.exit() ProjectInfo = Serializer.LoadFromHDF(args.project) LongestTrajLength = max(ProjectInfo['TrajLengths']) NumberOfHelix = -1*np.ones((ProjectInfo['NumTrajs'],LongestTrajLength)) print 'Calculating the Number of Helix for each trajectory......' for i in range(ProjectInfo['NumTrajs']): trajfile = ProjectInfo['TrajFilePath']+ProjectInfo['TrajFileBaseName']+'%d'%i+ProjectInfo['TrajFileType'] if os.path.exists(trajfile): print '%d in %d Trajectories'%(i,ProjectInfo['NumTrajs']),trajfile t = Trajectory.LoadFromLHDF(trajfile) Nv = hct.compute_numhelix_trajectory(t) NumberOfHelix[i,:len(Nv)] = Nv[:] print "Save to %s"%args.Output savetxt(args.Output,NumberOfHelix) print "Done."
'/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/ProjectInfo.h5' ) Counts = -1 * np.ones( (ProjectInfo['NumTrajs'], max(ProjectInfo['TrajLengths']))) print Counts.shape Savepath = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/result/NvOfTrajectory' plt.figure() plt.xlabel('Steps') plt.ylabel('Nv') plt.hold(False) for i in range(0, 93): T = Trajectory.LoadFromHDF( '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%d_hc.h5' % i) Hcount = hct.count_Helix(T) plt.title('Nv-steps of Traj%d' % i) plt.plot(range(len(Hcount)), Hcount, '.') print 'Save figure to %s/Nvoftraj%d.png' % (Savepath, i) plt.savefig('%s/Nvoftraj%d.png' % (Savepath, i)) Counts[i, :len(Hcount)] = Hcount[:] Counts_ma = np.ma.array(Counts, mask=[Counts == -1]) H_mean = Counts_ma.mean(0) H_std = Counts_ma.std(0) print H_mean plt.figure() plt.plot(range(len(H_mean)), H_mean, 'b') plt.title('AverageNv-Steps Of All Trajectories') plt.xlabel('Steps')
import os, sys import numpy as np from msmbuilder import Trajectory sys.path.append('~/scripts/gfzhou/') import HelixCoilTools as hct from scipy import savetxt """ This script is to get the number of helix from trajectories. """ datafile = "./numhelix_alltraj.txt" if os.path.exists(datafile): print "%s already exists!" % datafile print "quit." sys.exit() path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories" numhelix_alltraj = -1 * np.ones((100, 8000), dtype=int) for i in range(100): Trajfile = "%s/trj%d.lh5" % (path, i) if os.path.exists(Trajfile): T = Trajectory.LoadFromLHDF(Trajfile) print "Compute number of helix for %s" % Trajfile numhelix = hct.compute_numhelix_trajectory(T) numhelix_alltraj[i][:len(numhelix)] = numhelix[:] print "Save data to %s" % datafile savetxt(datafile, numhelix_alltraj) print "Done."
def Rgprediction(): try: Rgs = loadtxt( '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/result/Rgs.dat' ) except IOError: print "Can't find Rgs.dat, please run CalculateRg.py first." sys.exit() StatesAsi = hct.get_StatesAssignments(Assignments) Rgs_states = {} for state in StatesAsi.keys(): for trajid in StatesAsi[state].keys(): for frame in StatesAsi[state][trajid]: Rgs_states.setdefault(state, []).append(Rgs[int(trajid)][int(frame)]) states = [int(i) for i in Rgs_states.keys()] states.sort() mean_rg_states = [] std_rg_states = [] for state in states: mean_rg_states.append(np.mean(Rgs_states['%d' % state])) std_rg_states.append(np.std(Rgs_states['%d' % state])) #savetxt('mean_numhelix_states0.dat',mean_numhelix_states) #savetxt('std_numhelix_states0.dat',std_numhelix_states) print mean_rg_states P0 = np.zeros(len(Population)) for data in Assignments['Data']: P0[data[0]] += 1 P0 = P0 / P0.sum() populationslist = [] for k in range(140): populationslist.append(P0) P0 *= Tmatrix Rgs_predicted = np.dot(np.array(populationslist), np.array(mean_rg_states).reshape(-1, 1)) print Rgs_predicted Rgs_predicted = Rgs_predicted.reshape(1, -1)[0] plt.figure() plt.plot( np.arange(0, 7000, 50), Rgs_predicted, 'ro', ) plt.hold(True) Counts_ma = np.ma.array(Rgs, mask=[Rgs == -1]) Rgs_mean = Counts_ma.mean(0) Rgs_std = Counts_ma.std(0) print Rgs_mean plt.plot(range(len(Rgs_mean)), Rgs_mean, 'b') plt.title('Rgs-steps') plt.xlabel('Steps') plt.ylabel('Rgs') plt.legend(('Rgs_msm', 'Rgs_rawdata'), loc='upper left') figname = 'Rgs_prediction_%sCluster%0.1f_tau%d.png' % (metrics, cutoff, tau) plt.savefig(figname) print "Save to N%s" % figname
def GetHCStringsforTrajectory(trajectory): if isinstance(trajectory, str): gens = Trajectory.LoadFromLHDF(trajectory) dihedrals = hct.ComputeDihedralsFromTrajectory(gens) HCs = hct.ConvertDihedralsToHCStrings(dihedrals) print HCs
def Nvprediction(): try: mean_numhelix_states = loadtxt('mean_numhelix_states.dat') except IOError: StatesAsi = hct.get_StatesAssignments(Assignments) NumHelix_states = hct.compute_numhelix_states(StatesAsi) #print "NumHelix_states",NumHelix_states #savetxt('NumHelix_states',NumHelix_states) states = [int(i) for i in NumHelix_states.keys()] states.sort() mean_numhelix_states = [] std_numhelix_states = [] for state in states: mean_numhelix_states.append(np.mean(NumHelix_states['%d' % state])) std_numhelix_states.append(np.std(NumHelix_states['%d' % state])) savetxt('mean_numhelix_states.dat', mean_numhelix_states) savetxt('std_numhelix_states.dat', std_numhelix_states) #plt.figure() #plt.errorbar(states,mean_numhelix_states,std_numhelix_states) #plt.xlabel("State ID") #plt.ylabel("Number of Helix") #plt.savefig("Numhelix_states") #plt.show() P0 = np.zeros(len(Population)) for data in Assignments['Data']: P0[data[0]] += 1 P0 = P0 / P0.sum() populationslist = [] for k in range(140): # tau = 50, so 140*50 = 7000 populationslist.append(P0) P0 *= Tmatrix numhelix = np.dot(np.array(populationslist), np.array(mean_numhelix_states).reshape(-1, 1)) print numhelix numhelix = numhelix.reshape(1, -1)[0] plt.figure() plt.plot( np.arange(0, 7000, 50), numhelix, 'ro', ) # tau = 50, so 140*50 = 7000 plt.hold(True) Counts = -1 * np.ones( (ProjectInfo['NumTrajs'], max(ProjectInfo['TrajLengths']))) print Counts.shape for i in range(0, 93): T = Trajectory.LoadFromHDF( '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%d_hc.h5' % i) Hcount = hct.count_Helix(T) Counts[i, :len(Hcount)] = Hcount[:] Counts_ma = np.ma.array(Counts, mask=[Counts == -1]) H_mean = Counts_ma.mean(0) H_std = Counts_ma.std(0) print H_mean plt.plot(range(len(H_mean)), H_mean, 'b') plt.title('Nv-steps') plt.xlabel('Steps') plt.ylabel('Nv') plt.legend(('Nv_msm', 'Nv_rawdata'), loc='upper left') figname = 'Nv_prediction_%sCluster%0.1f_tau%d.png' % (metrics, cutoff, tau) plt.savefig(figname) print "Save to %s" % figname
import os, sys sys.path.append('~/scripts/gfzhou/') import HelixCoilTools as hct from msmbuilder import Trajectory """ This script shows how to create new trj files with hc strings. """ path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories" for i in range(0, 100): Trajfile = "%s/trj%d.lh5" % (path, i) if os.path.exists(Trajfile): T = Trajectory.LoadFromLHDF(Trajfile) hct.CreateTrajFileWithHCstrings(T) print "Done."
from msmbuilder import Serializer cutoff = 3.0 metrics = 'rmsd' if metrics.lower() == 'dihedral': Path = "/Users/tud51931/projects/MSM/msm/ff03-dihedralhybrid/" metrics = 'Dihedral' elif metrics.lower() == 'rmsd': Path = "/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/" metrics = 'RMSD' Path = os.path.join(Path, '%sCluster%0.1f' % (metrics, cutoff)) AssignmentFile = os.path.join(Path, "Data", "Assignments.h5") A = Serializer.LoadFromHDF(AssignmentFile) StateAssignment = hct.get_StatesAssignments(AssignmentFiles=A) RMSDFile = os.path.join(Path, "Data", "RMSD.h5") RMSD = Serializer.LoadFromHDF(RMSDFile) rmsd_allstates = {} for state in StateAssignment.keys(): rmsd_singlestate = [] for trajid in StateAssignment[state].keys(): rmsd_singlestate += list( RMSD['Data'][int(trajid)][StateAssignment[state][trajid]]) rmsd_allstates[int(state)] = rmsd_singlestate maxstatelength = max([len(i) for i in rmsd_allstates.values()]) StateRMSDs = copy.deepcopy(RMSD) StateRMSDs['Data'] = -1 * np.ones((len(rmsd_allstates), maxstatelength)) for state in rmsd_allstates.keys(): statelength = len(rmsd_allstates[state])