Exemple #1
0
parser = OptionParser()
parser.add_option('-t',dest='traj_dir',default='./Trajectories_AA/Autocorrelations',help='Directory to find the autocorrelation data. [ ./Trajectories_AA/Autocorrelations ]')
parser.add_option('-o',dest='out_FN',default='./AutoFits.dat',help='Output filename [ ./AutoFits.dat ]')
parser.add_option('-p',dest='procs',default=1,type=int,help='Number of processes to run. Be careful, since this is also the number of tajectories open at one time, so you could have a memory issue. [ 1 ]')

options, args = parser.parse_args()
 
import numpy as np
from pyschwancr import dataIO, FitData
import multiprocessing as mp
import os, sys, re

def AnalyzeTraj( trajFN ):
	print "Working on %s " % trajFN
	traj = dataIO.readData( trajFN )
	X = np.arange( len( traj ) )
	timescales = []
	for i in xrange( traj.shape[1] ):
		timescales.append(1. / FitData.ExponFit( X, traj[:,i], LogSample = True )[0] )

	return np.array( timescales )

trajList = dataIO.getTrajList( options.traj_dir, RegEx = '^trj\d+\.npy' )

pool = mp.Pool( options.procs )
result = pool.map_async( AnalyzeTraj, trajList )
sol = np.vstack( result.get() )

np.savetxt( options.out_FN, sol )
print sol.shape
 
import numpy as np
from Emsmbuilder import metrics
from pyschwancr import dataIO
from msmbuilder import Trajectory
import os, sys, re
 
# Check if folder exists:

if os.path.exists( options.write_dir ):
   print "Write directory (%s) already exists! Exiting..." % options.write_dir
   exit()

os.makedirs( options.write_dir )

trajList = dataIO.getTrajList( options.traj_dir )

getRightCap = { 'ca' : 'CA', 'closest' : 'closest', 'closest-heavy' : 'closest-heavy' }

CC = metrics.ContinuousContact( scheme = getRightCap[ options.scheme.lower() ] )

for trjFN in trajList:
   print "Working on %s" % trjFN 
   traj = Trajectory.Trajectory.LoadFromLHDF( trjFN )
   trajOut = CC.prepare_trajectory( traj )
   outFN =  trjFN.split('/')[-1][:-4] + '.npy'
   np.save( os.path.join( options.write_dir, outFN ), trajOut )
   del trajOut, traj

print "Done! Output saved to %s" % options.write_dir 
Exemple #3
0
from msmbuilder import Serializer, DistanceMetric, Trajectory, Conformation
from pyschwancr import dataIO
import os, sys, re

def AnalyzeTraj( TrajInd ):
	print "Working on Trajectories %s and %s" % (QTrajs[ TrajInd ], RTrajs[ TrajInd ] )
	QTraj = dataIO.readData( QTrajs[ TrajInd ] ).astype(np.uint8)
	RTraj = Trajectory.Trajectory.LoadFromLHDF( RTrajs[ TrajInd ] )
	
	TrajDist = DistLC.GetMultiDistance( [ RTraj['XYZList'], QTraj ], [ NatStateXYZ['XYZ'], NatStateQData ] )

	return TrajDist

# First create a list of the trajectories

RTrajs = dataIO.getTrajList( options.XYZ_dir )
QTrajs = dataIO.getTrajList( options.Q_dir, RegEx = r'^trj\d+\.npy' )

if len( RTrajs ) != len( QTrajs ):
	print "Need the same number of trajectories in XYZ_Dir (%s) and Q_Dir (%s)" % ( options.XYZ_dir, options.Q_dir )
	exit()

metrics = [ 'rmsd', 'qnorm' ]
coefficients = [ options.coef_rmsd, options.coef_qnorm ]
DistLC = DistanceMetric.LinearCombination( metrics, coefficients )

NatStateXYZ = Conformation.Conformation.LoadFromPDB( options.Rnat_state )
if options.Qnat_state:
	NatStateQData = dataIO.readData( options.Qnat_state )
else:
	# Need to get dimension of the QData to generate the native state:
Exemple #4
0
parser.add_option('--ri',dest='rind_FN',help='Residue indices to use in calculating contacts.')
options, args = parser.parse_args()

import numpy as np
from Emsmbuilder import clustering, metrics, Trajectory, Serializer
from pyschwancr import dataIO
import os
import re
Aind = np.loadtxt(options.aind_FN,int)
if options.rind_FN:
   Rind = np.loadtxt(options.rind_FN,int) - 1
else:
   Rind = 'all'

print "Loading XYZData Trajectories ..."
TrajList = dataIO.getTrajList( options.Tdata_dir )
Traj = clustering.concatenate_trajectories( [ Trajectory.Trajectory.LoadFromLHDF( fn )[::options.stride] for fn in TrajList ] )

print "Preparing metrics"
Coefficients = [ options.coef_rmsd, options.coef_qnorm ]
RMSD = metrics.RMSD( atomindices=Aind )
QNorm = metrics.BooleanContact( contacts=Rind, cutoff=0.6 )
LC = metrics.Hybrid( [ RMSD, QNorm ], Coefficients  )
print "Prepared metrics. Now clustering."

KMed = clustering.HybridKMedoids( LC, Traj, options.numGens, distance_cutoff = options.cutoff, local_num_iters= options.k_med_iters )
print "Done clustering. Saving data and exiting."
GenInd = KMed.generator_indices
Ass = KMed.assignments
Dist = KMed.distances
parser.add_option('--dir',dest='inDir',help='Input file directory containing files formatted as <metric>_drift_u##.npy')
parser.add_option('-d',dest='metric',default='rmsd',help='Metric used. Will use this to decide the format of the drift files.')
parser.add_option('-o',dest='outFN',help='Output file [ <metric>Data.h5 ]' )

options, args = parser.parse_args()
 
import numpy as np
from pyschwancr import dataIO
import os, sys, re
from msmbuilder import Serializer

regEx = r'^%s_drift_u\d+\.npy' % options.metric.lower()

BeginInd = { 'rmsd' : 12, 'qnorm' : 13, 'dihedral' : 16 }[ options.metric.lower() ]

FileList = dataIO.getTrajList( options.inDir, BeginInd = BeginInd, RegEx = regEx )

Taus = [ int( fn[ BeginInd + len( options.inDir ) + 1 : -4 ] ) for fn in FileList ]

Data = [ np.load( fn ).flatten() for fn in FileList ]

DataAry = np.ones( ( len( Data ), max([ d.shape[0] for d in Data ]) ) ) * -1

for i in xrange( len( Data ) ):
	DataAry[i][:len(Data[i])] = Data[i]

S = Serializer.Serializer( {'Data': DataAry, 'Taus' : Taus } )

if options.outFN:
	outFN = options.outFN
else:
if options.metric.lower() == 'rmsd':
   Dist = metrics.RMSD( atomindices = np.loadtxt( options.restInd, int ) )
elif options.metric.lower() == 'bool_cm':
   Dist = metrics.BooleanContact( contacts = np.loadtxt( options.restInd, int ) )
elif options.metric.lower() == 'cont_cm':
   Dist = metrics.ContinuousContact( contacts = np.loadtxt( options.restInd, int ) )
elif options.metric.lower() == 'dihedral':
   Dist = metrics.Dihedral( angles = open( options.restInd ).read().strip() )
else:
   print "Need to enter one of rmsd, bool_cm, cont_cm, or dihedral as a metric"
   exit()

Proj = Project.Project.LoadFromHDF( options.proj_FN )

DistOut = np.ones( ( Proj['TrajLengths'].shape[0], Proj['TrajLengths'].max() ) ) * -1

trajList = dataIO.getTrajList( Proj['TrajFilePath'] )
pdb = Trajectory.Trajectory.LoadTrajectoryFile( options.pdbFN )
pdb = Dist.prepare_trajectory( pdb )
print pdb.shape
for i, trajFN in enumerate(trajList):
   print "Working on %s" % trajFN
   traj = Trajectory.Trajectory.LoadFromLHDF( trajFN )
   traj = Dist.prepare_trajectory( traj )
   print traj.shape
   DistOut[i, : Proj['TrajLengths'][i] ] = Dist.one_to_all( pdb, traj, 0 )

   del traj

Serializer.SaveData(options.outFN, DistOut )