parser = OptionParser() parser.add_option('-t',dest='traj_dir',default='./Trajectories_AA/Autocorrelations',help='Directory to find the autocorrelation data. [ ./Trajectories_AA/Autocorrelations ]') parser.add_option('-o',dest='out_FN',default='./AutoFits.dat',help='Output filename [ ./AutoFits.dat ]') parser.add_option('-p',dest='procs',default=1,type=int,help='Number of processes to run. Be careful, since this is also the number of tajectories open at one time, so you could have a memory issue. [ 1 ]') options, args = parser.parse_args() import numpy as np from pyschwancr import dataIO, FitData import multiprocessing as mp import os, sys, re def AnalyzeTraj( trajFN ): print "Working on %s " % trajFN traj = dataIO.readData( trajFN ) X = np.arange( len( traj ) ) timescales = [] for i in xrange( traj.shape[1] ): timescales.append(1. / FitData.ExponFit( X, traj[:,i], LogSample = True )[0] ) return np.array( timescales ) trajList = dataIO.getTrajList( options.traj_dir, RegEx = '^trj\d+\.npy' ) pool = mp.Pool( options.procs ) result = pool.map_async( AnalyzeTraj, trajList ) sol = np.vstack( result.get() ) np.savetxt( options.out_FN, sol ) print sol.shape
import numpy as np from Emsmbuilder import metrics from pyschwancr import dataIO from msmbuilder import Trajectory import os, sys, re # Check if folder exists: if os.path.exists( options.write_dir ): print "Write directory (%s) already exists! Exiting..." % options.write_dir exit() os.makedirs( options.write_dir ) trajList = dataIO.getTrajList( options.traj_dir ) getRightCap = { 'ca' : 'CA', 'closest' : 'closest', 'closest-heavy' : 'closest-heavy' } CC = metrics.ContinuousContact( scheme = getRightCap[ options.scheme.lower() ] ) for trjFN in trajList: print "Working on %s" % trjFN traj = Trajectory.Trajectory.LoadFromLHDF( trjFN ) trajOut = CC.prepare_trajectory( traj ) outFN = trjFN.split('/')[-1][:-4] + '.npy' np.save( os.path.join( options.write_dir, outFN ), trajOut ) del trajOut, traj print "Done! Output saved to %s" % options.write_dir
from msmbuilder import Serializer, DistanceMetric, Trajectory, Conformation from pyschwancr import dataIO import os, sys, re def AnalyzeTraj( TrajInd ): print "Working on Trajectories %s and %s" % (QTrajs[ TrajInd ], RTrajs[ TrajInd ] ) QTraj = dataIO.readData( QTrajs[ TrajInd ] ).astype(np.uint8) RTraj = Trajectory.Trajectory.LoadFromLHDF( RTrajs[ TrajInd ] ) TrajDist = DistLC.GetMultiDistance( [ RTraj['XYZList'], QTraj ], [ NatStateXYZ['XYZ'], NatStateQData ] ) return TrajDist # First create a list of the trajectories RTrajs = dataIO.getTrajList( options.XYZ_dir ) QTrajs = dataIO.getTrajList( options.Q_dir, RegEx = r'^trj\d+\.npy' ) if len( RTrajs ) != len( QTrajs ): print "Need the same number of trajectories in XYZ_Dir (%s) and Q_Dir (%s)" % ( options.XYZ_dir, options.Q_dir ) exit() metrics = [ 'rmsd', 'qnorm' ] coefficients = [ options.coef_rmsd, options.coef_qnorm ] DistLC = DistanceMetric.LinearCombination( metrics, coefficients ) NatStateXYZ = Conformation.Conformation.LoadFromPDB( options.Rnat_state ) if options.Qnat_state: NatStateQData = dataIO.readData( options.Qnat_state ) else: # Need to get dimension of the QData to generate the native state:
parser.add_option('--ri',dest='rind_FN',help='Residue indices to use in calculating contacts.') options, args = parser.parse_args() import numpy as np from Emsmbuilder import clustering, metrics, Trajectory, Serializer from pyschwancr import dataIO import os import re Aind = np.loadtxt(options.aind_FN,int) if options.rind_FN: Rind = np.loadtxt(options.rind_FN,int) - 1 else: Rind = 'all' print "Loading XYZData Trajectories ..." TrajList = dataIO.getTrajList( options.Tdata_dir ) Traj = clustering.concatenate_trajectories( [ Trajectory.Trajectory.LoadFromLHDF( fn )[::options.stride] for fn in TrajList ] ) print "Preparing metrics" Coefficients = [ options.coef_rmsd, options.coef_qnorm ] RMSD = metrics.RMSD( atomindices=Aind ) QNorm = metrics.BooleanContact( contacts=Rind, cutoff=0.6 ) LC = metrics.Hybrid( [ RMSD, QNorm ], Coefficients ) print "Prepared metrics. Now clustering." KMed = clustering.HybridKMedoids( LC, Traj, options.numGens, distance_cutoff = options.cutoff, local_num_iters= options.k_med_iters ) print "Done clustering. Saving data and exiting." GenInd = KMed.generator_indices Ass = KMed.assignments Dist = KMed.distances
parser.add_option('--dir',dest='inDir',help='Input file directory containing files formatted as <metric>_drift_u##.npy') parser.add_option('-d',dest='metric',default='rmsd',help='Metric used. Will use this to decide the format of the drift files.') parser.add_option('-o',dest='outFN',help='Output file [ <metric>Data.h5 ]' ) options, args = parser.parse_args() import numpy as np from pyschwancr import dataIO import os, sys, re from msmbuilder import Serializer regEx = r'^%s_drift_u\d+\.npy' % options.metric.lower() BeginInd = { 'rmsd' : 12, 'qnorm' : 13, 'dihedral' : 16 }[ options.metric.lower() ] FileList = dataIO.getTrajList( options.inDir, BeginInd = BeginInd, RegEx = regEx ) Taus = [ int( fn[ BeginInd + len( options.inDir ) + 1 : -4 ] ) for fn in FileList ] Data = [ np.load( fn ).flatten() for fn in FileList ] DataAry = np.ones( ( len( Data ), max([ d.shape[0] for d in Data ]) ) ) * -1 for i in xrange( len( Data ) ): DataAry[i][:len(Data[i])] = Data[i] S = Serializer.Serializer( {'Data': DataAry, 'Taus' : Taus } ) if options.outFN: outFN = options.outFN else:
if options.metric.lower() == 'rmsd': Dist = metrics.RMSD( atomindices = np.loadtxt( options.restInd, int ) ) elif options.metric.lower() == 'bool_cm': Dist = metrics.BooleanContact( contacts = np.loadtxt( options.restInd, int ) ) elif options.metric.lower() == 'cont_cm': Dist = metrics.ContinuousContact( contacts = np.loadtxt( options.restInd, int ) ) elif options.metric.lower() == 'dihedral': Dist = metrics.Dihedral( angles = open( options.restInd ).read().strip() ) else: print "Need to enter one of rmsd, bool_cm, cont_cm, or dihedral as a metric" exit() Proj = Project.Project.LoadFromHDF( options.proj_FN ) DistOut = np.ones( ( Proj['TrajLengths'].shape[0], Proj['TrajLengths'].max() ) ) * -1 trajList = dataIO.getTrajList( Proj['TrajFilePath'] ) pdb = Trajectory.Trajectory.LoadTrajectoryFile( options.pdbFN ) pdb = Dist.prepare_trajectory( pdb ) print pdb.shape for i, trajFN in enumerate(trajList): print "Working on %s" % trajFN traj = Trajectory.Trajectory.LoadFromLHDF( trajFN ) traj = Dist.prepare_trajectory( traj ) print traj.shape DistOut[i, : Proj['TrajLengths'][i] ] = Dist.one_to_all( pdb, traj, 0 ) del traj Serializer.SaveData(options.outFN, DistOut )