Beispiel #1
0
def addGauss( data ):
   if not options.weight_FN:
      print "Need weights in order to add gaussians together."
   
   means = data[:,1]
   stds = data[:,2]

   meanPlus2sigma = means + 2 * stds
   meanMinus2sigma = means - 2 * stds

   minX = min( [ 0, meanMinus2sigma.min() ] )
   maxX = meanPlus2sigma.max()
   print minX, maxX
   weights = dataIO.readData( options.weight_FN )

   weights *= sqrt( 2. * pi * stds )

   xi = linspace( minX, maxX, 100 )
   
   y = zeros( 100 )

   for i in range( data.shape[0] ):
      y += gauss( [ weights[i], means[i], stds[i] ], xi )
   
   return array( zip( xi, y ) )
Beispiel #2
0
def main():
	data = dataIO.readData(options.data_FN)
	data = prepData( data )
	maxVal = data[:,1].max()
	minX = data[:,0].min()
	maxX = data[:,0].max()
	temp = (maxX - minX) * 0.1
	p0 = [ maxVal, temp, temp, maxVal, maxX - temp, temp ]
	
	pF, success = optimize.leastsq( errFnc, p0[:], args=( data[:,0], data[:,1] ), maxfev=100000, xtol=1E-10 )
	print pF

	xFit = linspace(data[:,0].min(),data[:,0].max(),100)
	yFit = fitFnc( abs(pF), xFit )

	plot( data[:,0], data[:,1],'.',label='Data')
	plot( xFit, yFit, '-r',lw=2,label='Fit')
	legend()
	if options.out_FN:
		ttl = '.'.join( options.out_FN.split('.')[:-1] )
	else:
		ttl = '.'.join( options.data_FN.split('/')[-1].split('.')[:-1]) + 'Fit to Gaussians'

	xlabel('.'.join( options.data_FN.split('/')[-1].split('.')[:-1] ) )
	ylabel('Frequency')
	title(ttl)

	if options.out_FN:
		savefig(options.out_FN[:-4]+'.pdf')
	else:
		savefig('.'.join( options.data_FN.split('/')[-1].split('.')[:-1] ) + 'fit2gauss.pdf')

	print "Wrote parameters to %s" % dataIO.writeData( [ options.out_FN ], pF )

	return
Beispiel #3
0
def AnalyzeTraj( TrajInd ):
	print "Working on Trajectories %s and %s" % (QTrajs[ TrajInd ], RTrajs[ TrajInd ] )
	QTraj = dataIO.readData( QTrajs[ TrajInd ] ).astype(np.uint8)
	RTraj = Trajectory.Trajectory.LoadFromLHDF( RTrajs[ TrajInd ] )
	
	TrajDist = DistLC.GetMultiDistance( [ RTraj['XYZList'], QTraj ], [ NatStateXYZ['XYZ'], NatStateQData ] )

	return TrajDist
Beispiel #4
0
def AnalyzeTraj( trajFN ):
	print "Working on %s " % trajFN
	traj = dataIO.readData( trajFN )
	X = np.arange( len( traj ) )
	timescales = []
	for i in xrange( traj.shape[1] ):
		timescales.append(1. / FitData.ExponFit( X, traj[:,i], LogSample = True )[0] )

	return np.array( timescales )
Beispiel #5
0
def AnalyzeTraj( trajFN ):
	# this function analyzes a trajectory to get the drift a given distance metric
	traj = dataIO.readData( trajFN )[::options.stride]
	if options.metric.lower() == 'qnorm':
		traj = traj.astype(uint8)

	print "Working on %s" % trajFN
	drifts = []
	for i in range(len( traj )-1 ): # Start at one and end one before the last because I can't average over two values.
		drifts.append( Dist.GetDistance( traj[i], traj[i+1] ) )

	drifts = array( drifts )
	
	return drifts
Beispiel #6
0
def AnalyzeTraj(trajFN):
    # this function analyzes a trajectory to get the drift a given distance metric
    traj = dataIO.readData(trajFN)[:: options.stride]
    if options.metric.lower() == "qnorm":
        traj = traj.astype(uint8)

    print "Working on %s" % trajFN
    drifts = []
    for i in range(len(traj) - 1):  # Start at one and end one before the last because I can't average over two values.
        drifts.append(Dist.GetDistance(traj[i], traj[i + 1]))
        # print drifts[-1]

    drifts = array(drifts)
    midDrifts = (drifts[1:] + drifts[:-1]) / 2.0

    outAry = concatenate(([drifts[0]], midDrifts, [drifts[-1]]))
    return outAry
Beispiel #7
0
# First create a list of the trajectories

RTrajs = dataIO.getTrajList( options.XYZ_dir )
QTrajs = dataIO.getTrajList( options.Q_dir, RegEx = r'^trj\d+\.npy' )

if len( RTrajs ) != len( QTrajs ):
	print "Need the same number of trajectories in XYZ_Dir (%s) and Q_Dir (%s)" % ( options.XYZ_dir, options.Q_dir )
	exit()

metrics = [ 'rmsd', 'qnorm' ]
coefficients = [ options.coef_rmsd, options.coef_qnorm ]
DistLC = DistanceMetric.LinearCombination( metrics, coefficients )

NatStateXYZ = Conformation.Conformation.LoadFromPDB( options.Rnat_state )
if options.Qnat_state:
	NatStateQData = dataIO.readData( options.Qnat_state )
else:
	# Need to get dimension of the QData to generate the native state:
	Qn = dataIO.readData( QTrajs[0] ).shape[1]
	NatStateQData = np.ones( Qn ).astype(np.uint8)

TotalDists = []

for i in range( len( RTrajs ) ):
	TotalDists.extend( AnalyzeTraj( i ) )

TotalDists = np.array( TotalDists )	

np.save( options.output, TotalDists )
options, args = parser.parse_args()
 
from numpy import *
import matplotlib
matplotlib.use('pdf')
from matplotlib.pyplot import *
from pyschwancr import dataIO
import os, sys, re
 
FoldTimeList = []
UnfoldTimeList = []
X_values = []
Titles = options.data_FNs 
maxVal = 0
for fn in Titles:
	FoldTimeList.append( dataIO.readData( fn + '_FoldTimes.dat' ) )
	UnfoldTimeList.append( dataIO.readData( fn + '_UnfoldTimes.dat' ) )

	if FoldTimeList[-1].max() > maxVal:
		maxVal = FoldTimeList[-1].max()

	if UnfoldTimeList[-1].max() > maxVal:
		maxVal = UnfoldTimeList[-1].max()

	m = re.search( '.*_([-.\d]+)', fn )
	if m:
		X_values.append( float( m.group(1) ) )
	else:
		print "Filename contains no number !!!! (In the form .*_[-.\d]+)"
		exit()
def main():

	# First load all the data.
	print "Loading data ...",
	Proj = Project.Project.LoadFromHDF( options.proj_FN )
	Ass = Serializer.LoadData( options.ass_FN )
	xDat = dataIO.readData( options.x_FN )[:,0]
	yDat = dataIO.readData( options.y_FN )[:,0]
	print "Done."

	print "Reformatting raw data...",
	xFmtd = np.ones( Ass.shape ) * -1
	yFmtd = np.ones( Ass.shape ) * -1
		
	lengthSum = 0
	for i in range( len( Proj['TrajLengths'] ) ):
		#print xDat[ lengthSum : lengthSum + Proj['TrajLengths'][i] ].shape
		#print xFmtd[i][:Proj['TrajLengths'][i]].shape
		xFmtd[i,:Proj['TrajLengths'][i]] = xDat[ lengthSum : lengthSum + Proj['TrajLengths'][i] ]
		yFmtd[i,:Proj['TrajLengths'][i]] = yDat[ lengthSum : lengthSum + Proj['TrajLengths'][i] ]
		lengthSum += Proj['TrajLengths'][i]
	print "Done."

	MC_Mover, X_f, X_u, Y_f, Y_u = SetupMover(xDat, yDat)
	if options.state:
		whichState = options.state
	else:
		whichState = np.random.randint( Ass.max() + 1 )

	whichConfs = np.array( np.where( Ass == whichState ) ).T
	if whichConfs.shape[0] > 100:
		whichConfs = whichConfs[:100]
		print "Only using the first 100 conformations in the state... (Out of %d)" % ( len( np.where( Ass == whichState )[0] ) )
	# whichConfs will be used to get the pdb's for each conformation.
	Pfolds_Raw = StartRawTrajs( whichConfs, Proj )

	startingPositionsXY = np.array( zip( xFmtd[ np.where( Ass == whichState )	], yFmtd[ np.where( Ass == whichState ) ] ) )
	# These are the starting locations for doing diffusion.
	Pfolds_MC = []
	count = 0.
	for XY in startingPositionsXY:
		count += 1. / len( startingPositionsXY )
		print "Calculating Diffusion Pfolds. %.2f%% Done.\r" % ( count * 100 ),
		r0 = MC_Mover.getIndices( XY[0], XY[1] )
		Pfolds_MC.append( MC_Pfold( MC_Mover, r0, X_f, X_u, Y_f, Y_u ) )

	xDat_name = '.'.join( options.x_FN.split('/')[-1].split('.')[:-1] )
	yDat_name = '.'.join( options.y_FN.split('/')[-1].split('.')[:-1] )
	np.savetxt('MC_Pfolds_State%d_%s_%s.dat' % ( whichState, xDat_name, yDat_name ), Pfolds_MC )
	print "Saved MC_Pfolds to %s" % 'MC_Pfolds_State%d_%s_%s.dat' % ( whichState, xDat_name, yDat_name )
	np.savetxt('Raw_Pfolds_State%d.dat' % options.state, Pfolds_Raw )
	print "Saved Raw_Pfolds to %s" % 'Raw_Pfolds_State%d.dat' % whichState 

	plot( Pfolds_MC, Pfolds_Raw, '.' )
	xlabel('Diffusion Pfolds')
	ylabel('Raw Pfolds (MD)')
	title('Raw Pfolds vs Diffusion Pfolds')
	xlim([0,1])
	ylim([0,1])
	savefig('State%d_RawVsDiff_%s_%s.pdf' % ( whichState, xDat_name, yDat_name ) )
	print "Saved plot to %s" % 'State%d_RawVsDiff_%s_%s.pdf' % ( whichState, xDat_name, yDat_name )
Beispiel #10
0
)
parser.add_option("-f", dest="f_cut", type=float, help="Folded cutoff. NOTE: greater than f_cut is folded.")
parser.add_option("-u", dest="u_cut", type=float, help="Unfolded cutoff. NOTE: less than u_cut is unfolded.")
parser.add_option(
    "-o", dest="outFN", default="MC_vs_Raw_PFolds.dat", help="Output file to write data to [ ./MC_vs_Raw_PFolds.dat ]"
)
options, args = parser.parse_args()

from pyschwancr import dataIO, MonteCarlo
from numpy import *
from matplotlib.backends import backend_pdf
from matplotlib.pyplot import *
from msmbuilder import Project
import re

xDat = dataIO.readData(options.x_FN)
yDat = dataIO.readData(options.y_FN)
Proj = Project.Project.LoadFromHDF(options.proj_FN)
rawFN = open(options.raw_FN)

rawText = rawFN.read()

rawLines = rawText.split("trj")

rawXY = []
rawPfolds = []

trajLengths = Proj["TrajLengths"]


if len(xDat.shape) > 1:
Beispiel #11
0
def main():
	# First load in all the data:
	print "Loading Data..."
	ass = Serializer.LoadData(options.ass_FN)
	tProb = mmread(options.trans_FN)
	Proj = Project.LoadFromHDF(options.proj_FN)
	rawAry = dataIO.readData(options.raw_FN)
	msmAry = dataIO.readData(options.msm_FN)

	if options.low_is_folded:
		testFcn = lambda x: x <= options.fCut
	else:
		testFcn = lambda x: x >= options.fCut
	print "Calculating the raw folded over time..."
	# Now break up the raw data by trajectory:
	sum = 0
	rawTrajs = []
	for i in range( len( Proj['TrajLengths'] ) ):
		rawTrajs.append( rawAry[ sum : sum + Proj['TrajLengths'][i] ] )
		sum += Proj['TrajLengths'][i]

	# Now calculate the steps it takes to fold for each trajectory:
	time2fold_raw = []
	for trj in rawTrajs:
		count = 0
		for frame in trj:
			if testFcn(frame):
				break
			count += 1
		time2fold_raw.append( count )	
	time2fold_raw = array( time2fold_raw )
	rawName = dataIO.writeData( [ "RAW_FractionFolded" ], time2fold_raw )
	# Now calculate the msm fraction folded (using msmTools.calcFracFold)
	x0 = zeros( tProb.shape[0] ) # This start vector is based on the first frame of all the trajectories
	for i in range( ass.shape[0] ):
		x0[ ass[i,0] ] += 1.
	x0 /= float(ass.shape[0])
	print "Defining the folded state and calculating the MSM folded over time"
	# Need to define the folded state. Will use a cutoff, but if there are no states below/above the cutoff then pick the min/max value
	Fstates = []
	for index,stateAvg in enumerate(msmAry[:,1]):
		if testFcn( stateAvg ):
			Fstates.append( index )
	Fstates = array( Fstates )

	if not Fstates.any():
		if options.low_is_folded:
			Fstates = array([ where( msmAry[:,1] == msmAry[:,1].min() ) ] )
		else:
			Fstates = array([ where( msmAry[:,1] == msmAry[:,1].max() ) ] )
	N = time2fold_raw.max() / options.lag + 1
	time2fold_msm = msmTools.calcFracFold( Fstates, tProb, x0, N = N )
	datName = dataIO.writeData( [ "MSM_FractionFolded", str(options.lag) ], time2fold_msm )
	print "Saved data to %s" % datName
	# Now plot everything
	print "Making plot ..."
	hist( time2fold_raw, bins=100, histtype='step',label="Raw Data",cumulative=True,normed=True)
	
	plot( arange( N ) * options.lag , time2fold_msm, label="MSM")
	hlines( 1.0, xmin=0, xmax=time2fold_raw.max(),color='red' )
	xlim([0,time2fold_raw.max()])
	ylim([0,1.25])
	legend()
	xlabel( 'Time (frames)' )
	ylabel( 'Fraction Folded' )
	if options.title:
		title( 'Fraction folded over time (%s)' % options.title)
	else:
		title( 'Fraction folded over time' )
	text( 0.75 * time2fold_raw.max(), 0.2, "N = %d" % ass.shape[0] )
	savefig( "FracFolded_%s_rawVsMsm.pdf" % '.'.join( options.raw_FN.split('/')[-1].split('.')[:-1] ) )
	print "Plot saved to %s" % ("FracFolded_%s_rawVsMsm.pdf" % '.'.join( options.raw_FN.split('/')[-1].split('.')[:-1] ) )	
parser.add_option('-r',dest='raw_FN',help='Raw data to plot against it\'s average')
parser.add_option('-m',dest='msm_FN',help='Average data for each state')
parser.add_option('-u',dest='stride',type=int,help='Stride to color some data with')
options, args = parser.parse_args()

from numpy import *
import matplotlib
matplotlib.use('agg')
from matplotlib.pyplot import *
from pyschwancr import dataIO
from msmbuilder import Project

print "Loading Data"
Proj = Project.Project.LoadFromHDF( options.proj_FN )
Ass = Project.Serializer.LoadData( options.ass_FN ).astype( int )
Raw = dataIO.readData( options.raw_FN )
if len( Raw.shape ) > 1:
	print "Using column 0 of raw data..."
	Raw = Raw[:,0]
Avg = dataIO.readData( options.msm_FN )

# Reformat the raw data to the assignments style:
fmtRaw = ones( Ass.shape ) * -1
lengthSum = 0
for i in range( len( Proj['TrajLengths'] ) ): 
	fmtRaw[i][:Proj['TrajLengths'][i]] = Raw[ lengthSum : lengthSum + Proj['TrajLengths'][i] ]
	lengthSum += Proj['TrajLengths'][i]
# The above may not seem necessary, since we reformat it and then make it 1D again,
#  but since there may be trimmed states, we need to remove them, This was the best way I saw to do that.

Avg1D = Avg[:,1][ Ass[ where( Ass != -1 ) ] ].flatten()
Beispiel #13
0
import numpy as np
from scipy.io import mmread
from msmbuilder import tpt
from pyschwancr import dataIO
import os, sys, re


def AnalyzeIndex(state):
    print "Working on state %d" % state
    return tpt.GetMFPTsolve([state], T, LagTime=Lag)
    # return TPT.GetMFPTFundMat( state, T, Pops, LagTime=Lag )


print "Loading data..."
T = mmread(options.T_FN)
Pops = dataIO.readData(options.pops_FN)
Lag = options.lag
outFN = options.out_FN
if options.state_FN:
    F = dataIO.readData(options.state_FN)  # If there is a list of states use it
else:  # Otherwise check the MFPTs for ALL states in T
    F = np.arange(T.shape[0])

print "Calculating MFPTs"

sol = []
for state in F:
    sol.append(AnalyzeIndex(state))

ResAry = np.array(sol).T
# This is the results array. The data will be stored as columns, so the i,j th entry will be the MFPT from state i to state j
Beispiel #14
0
options, args = parser.parse_args()

# This script will read in data about Pfolds and plot the forward committors on the y-axis and the Pfolds calculated (along with their standard deviations on the x-axis
# The two plots will be:
#  1) A plot of all data points
#  2) A plot of the avg plus StdDev

import matplotlib
matplotlib.use('Pdf')
from matplotlib.pyplot import *
from numpy import *
from pyschwancr import dataIO
import re

msm = dataIO.readData(options.msm_FN)
rawFN = open( options.raw_FN, 'r' )

rawList = rawFN.read().split('State')
rawList.pop(0) # There is an initial '' since It splits at the first characters.
rawData = {}

for state in rawList:
	stateList = state.split('\n')
	stateDat = []
	for conf in stateList[1:]:
		m = re.search("Folded\s*=\s*(\d+)\s*Unfolded\s*=\s*(\d+)",conf)
		if m:
			N_unfolded = int( m.group( 2 ) )
			N_folded = int( m.group( 1 ) )
			stateDat.append( N_folded / float( N_folded + N_unfolded ) )
options, args = parser.parse_args()
 
from numpy import *
import matplotlib
matplotlib.use('pdf')
from matplotlib.pyplot import *
from pyschwancr import dataIO
import os, sys, re
from scipy.optimize import curve_fit

X_values = []
Titles = options.data_FNs 
DatList = []

for fn in Titles:
	DatList.append( dataIO.readData( fn ) )
	
	m = re.search( '.*_([-.\d]+)', fn )
	if m:
		X_values.append( float( m.group(1) ) )
	else:
		print "Filename contains no number !!!! (In the form .*_[-.\d]+)"
		exit()

print "Loaded Data."
# First make the individual plots:

figure()
for i in range( len( Titles ) ):
	plot( DatList[i], label = str( X_values[i] ) + '%' )
	
import matplotlib
matplotlib.use('pdf')
from matplotlib.pyplot import *
#from matplotlib.backends.backend_pdf import PdfPages
from scipy import interpolate
from pyschwancr import dataIO
from numpy import log10 as l10
import re

#matplotlib.rc('text',usetex=True)
	
if options.font_size != None:
	matplotlib.rcParams['font.size'] = options.font_size

print "IF THE INPUT DATA IS COMPLEX, THE PLOTS WILL ONLY BE THE REAL PART!!!"
X = dataIO.readData( options.xFN ).real
Y = dataIO.readData( options.yFN ).real

if options.xFN[-3:] == '.h5':
    X = X[np.where(X!=-1)]
if options.yFN[-3:] == '.h5':
    Y = Y[np.where(Y!=-1)]

if len( X.shape ) > 1:
	if X.shape[1] == 1:
		print "X formatted strangely... Using X[:,0]"
		X = X[:,0]
	else:
		X = X[:,1]
		print "X formatted strangely... Using X[:,1]"
Beispiel #17
0
options, args = parser.parse_args()

import numpy as np
import matplotlib
matplotlib.use('pdf')
from matplotlib.pyplot import *
from pyschwancr import dataIO
import os, sys, re
 
print "Loading Data"

Ms = []
Sts = []

for mat_fn, state_fn in zip( options.mfpt_list, options.states_list ):
   Ms.append( dataIO.readData( mat_fn ) )
   Sts.append( dataIO.readData( state_fn ) )

Ms = np.hstack( Ms )
Sts = np.concatenate( Sts ).astype(int)

Data = dataIO.readData( options.data_FN )[:,1]
cutF = options.cutF
cutU = options.cutU

if options.low_is_folded:
   isFary = Data <= cutF
   isUary = Data >= cutU
else:
   isFary = Data >= cutF
   isUary = Data <= cutU
Beispiel #18
0
#!/usr/bin/env python

from optparse import OptionParser

parser = OptionParser()
parser.add_option(
    "-d", dest="dir", help="Directory to look for data (stateAvg_Qtot.dat.Fixed, Degrees.dat, Populations.dat)"
)

options, args = parser.parse_args()

import numpy as np
from pyschwancr import dataIO
import os, sys, re

pops = dataIO.readData(os.path.join(options.dir, "Populations.dat"))
deg = dataIO.readData(os.path.join(options.dir, "Degrees.dat"))
try:
    q = dataIO.readData(os.path.join(options.dir, "stateAvg_Qtot.dat.Fixed"))[:, 1]
    isF = q > 0.4
    isU = q <= 0.4
except:
    try:
        r = dataIO.readData(os.path.join(options.dir, "stateAvg_RMSD.Fixed.dat"))[:, 1]
        isF = r < 0.4
        isU = r > 0.4
    except:
        print "Need either stateAvg_[Qtot,RMSD].Fixed.dat in the directory"
        exit()

sumDegF = int(deg[isF].sum())
Beispiel #19
0
parser.add_option('-g',dest='gamma',default=1.,type=float, help="Friction coefficient")
parser.add_option('-t',dest='temp',default=100.,type=float,help="Temperature")
parser.add_option('--dt',dest='dt',default=0.0005,type=float, help="Timestep")
parser.add_option('-n',dest='num_iter',default=100, type=int, help="Number of iterations to perform")
parser.add_option('-o',dest='out_FN',help="Output filename")
options, args = parser.parse_args()

from pyschwancr import BD, dataIO
from numpy import *
import matplotlib
matplotlib.use('pdf')
from matplotlib.pyplot import *
import random
import re

V = - 1.38E-23 * options.temp * np.log( 10**( dataIO.readData( options.pot_FN )  ) )
V = V.T # This is needed to swap the axes
Nx, Ny = V.shape
print V.shape
dx = 1. / (Nx - 1.)
dy = 1. / (Ny - 1.)

imshow(V.T,extent=[0,1,0,1],origin='bottom',cmap='jet_r') # V has x-axis = axis 0, but need to transpose to use imshow correctly

def Force( R ):
	wallForce = 100
	# Need to get the indices to look in V:
	I_x = int( R[0] * ( Nx - 1 ) )
	I_y = int( R[1] * ( Ny - 1 ) )

	if I_x >= Nx-1:
Beispiel #20
0
import os, sys, re

def AnalyzeState( state ):
   print "Working on state %d" % state
   stateInd = np.where( ass == state )
   if not belowCut[ stateInd ].sum() in [ 0, stateInd[0].shape[0] ]: # split the state
      return True
   else:
      return False

   return False


# Read in the data
proj = Project.Project.LoadFromHDF( options.proj_FN )
data = dataIO.readData( options.data_FN )
ass = Serializer.LoadData( options.ass_FN )
rmsd = Serializer.LoadData( options.ass_FN + '.RMSD' ) # This could end poorly.... but I can add another parameter if need be...
gens = Trajectory.Trajectory.LoadFromLHDF( options.gens_FN )
print "Loaded the data"
if os.path.isdir( options.write_dir ):
   print "Directory exists, will write data to %s... Careful, this could cause options, since overwriting will CRASH this script" % options.write_dir
else:
   os.mkdir( options.write_dir )
   print "Made output directory (%s)" % options.write_dir

data2d = msmTools.reshapeRawData( data, proj )

maxState = ass.max()

belowCut = ( data2d <= options.cutoff )
Beispiel #21
0
)
parser.add_option("-o", dest="out_FN", default="Fold_Unfold_Times.pdf", help="Output file to write to")

options, args = parser.parse_args()
from numpy import *
from msmbuilder import Project
from pyschwancr import dataIO, msmTools
import os, sys, re
import matplotlib

matplotlib.use("pdf")
from matplotlib.pyplot import *
from scipy import optimize

Proj = Project.Project.LoadFromHDF(options.proj_FN)
Data = dataIO.readData(options.data_FN)

# first reshape the data into trajectories.

Lens = Proj["TrajLengths"]

Trajs = []
sum = 0
for i in range(len(Lens)):
    Trajs.append(Data[sum : sum + Lens[i]])
    sum += Lens[i]
Folds = []
Unfolds = []

for traj in Trajs:
    (a, b) = msmTools.calcRawFoldTime(traj, options.f_cut, options.u_cut, low_is_folded=options.low_is_folded)
Beispiel #22
0
#!/usr/bin/env python
 
from optparse import OptionParser
parser = OptionParser()
parser.add_option('-f', dest='input_FN', default='./stateAvg_RMSD.Fixed.dat', help='Input stateAvg_RMSD.dat' )
options, args = parser.parse_args()
 
from msmbuilder import Serializer
from pyschwancr import dataIO
import os, sys, re
 
msmAvg = dataIO.readData( options.input_FN )

avgs = msmAvg[:,1]
vars = msmAvg[:,2] ** 2

s = Serializer.Serializer( { 'state_mean_rmsd': avgs, 'state_var_rmsd': vars } )

s.SaveToHDF( 'ClusterStats.hdf' )
def main():
	# Need to construct the list of trajframes to use. First look at what the values are for the ones already done.

	OldTrajFrames = []
	
	for tempDir in dirs2check:
		tempList = [ fn for fn in os.listdir( tempDir ) if re.search( 'trj\d+_frm\d+$', fn ) ]
		tempTrajFrames = [ re.search( 'trj(\d+)_frm(\d+)$', fn ).groups() for fn in tempList ]
		tempTrajFrames = [ [ int( a ), int( b ) ] for (a,b) in tempTrajFrames ]

		OldTrajFrames.extend( tempTrajFrames )

	Xdat = dataIO.readData( options.x_dat )
	Ydat = dataIO.readData( options.y_dat )
	uniqX = unique( Xdat )
	uniqY = unique( Ydat )
	diffX = abs( uniqX[1:] - uniqX[:-1] ).min()
	diffY = abs( uniqY[1:] - uniqY[:-1] ).min()
	
	Nx = 1. / diffX
	Ny = 1. / diffY

	Xdat = Xdat * ( Nx ) 
	Ydat = Ydat * ( Ny ) 
	
	if len( Xdat.shape ) > 1:
		Xdat = Xdat[:,0]
	if len( Ydat.shape ) > 1:
		Ydat = Ydat[:,0]

	Xdat2D = msmTools.reshapeRawData( Xdat.astype(int), Proj )
	Ydat2D = msmTools.reshapeRawData( Ydat.astype(int), Proj )
	
	x_interval = [ float( i ) for i in options.x_int.split(',') ]
	y_interval = [ float( i ) for i in options.y_int.split(',') ]

	x_range = arange( int( x_interval[0] * Nx ), int( x_interval[1] * Nx ) + 1 )
	y_range = arange( int( y_interval[0] * Ny ), int( y_interval[1] * Ny ) + 1 )

	#print x_range, y_range
	#print Xdat.max(), Ydat.max()
	#print OldTrajFrames
	# Now construct the list of traj frames to use in the analysis
	TrajFrames = []
	print "Finding conformations..."
	for Xi in x_range:
		for Yi in y_range:
			whichTrajFrames = array( where( ( Xdat2D == Xi ) * ( Ydat2D == Yi) ) ).T
			if len( whichTrajFrames ) <= 4:
				ToAddTrajFrames = [ list( i ) for i in whichTrajFrames ] 
			else:
				ToAddTrajFrames = [ list( pair ) for pair in whichTrajFrames if list( pair ) in OldTrajFrames ] 
				# The above list contains pairs for this x,y for which the simulations have already been done.
				if len( ToAddTrajFrames ) > 4:
					ToAddTrajFrames = [ list( pair ) for pair in random.permutation( ToAddTrajFrames )[:4] ]
				while len( ToAddTrajFrames ) < 4:
					randPair = whichTrajFrames[ random.randint( len( whichTrajFrames ) ) ]
					randPair = list( randPair )
		#			print '\t', randPair, ToAddTrajFrames
					if randPair in ToAddTrajFrames:
						continue
					else:
						ToAddTrajFrames.append( randPair )
			TrajFrames.extend( ToAddTrajFrames )				
	#		print Xi, Yi, ToAddTrajFrames, [ ( Xdat2D[ tuple(pair) ], Ydat2D[ tuple(pair) ] ) for pair in ToAddTrajFrames ]

	print TrajFrames

	print "Running the simulations..."
	run( TrajFrames )
Beispiel #24
0
import matplotlib
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.pyplot import *
from pyschwancr import dataIO, msmTools
import os, sys, re

Ass = Serializer.LoadData( args.ass_FN )
Proj = Serializer.LoadFromHDF( args.proj_FN )


if (Ass.max()+1) > 100:
   print "You have %d states... This is going to be a large pdf file..." % (Ass.max()+1)

pp = PdfPages( args.out_FN )

Data = dataIO.readData( args.data_FN )

if len(Data.shape) == 1:
   Data = msmTools.reshapeRawData( Data, Proj )

x0 = 0
x1 = int(Data.max()+1)

if args.x_lbl != None:
   x_lbl = ' '.join( args.x_lbl )
else:
   x_lbl = 'Data in State'

for i in range( Ass.max() + 1 ):

   figure()
from pyschwancr import dataIO, msmTools
import os, sys, re
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import numpy as np
import warnings 


warnings.filterwarnings('ignore','Warning: overflow encountered in exp')

def f( x , a, b ):
	return np.exp( - b * x )
print "Loading data"
proj = Serializer.Serializer.LoadFromHDF( options.proj_FN )

data = dataIO.readData( options.raw_FN )
data2d = msmTools.reshapeRawData( data, proj )

print "Calculating autocorrelations"
Autos = [ autocorrelate.fft_autocorrelate( trj[ np.where( trj != -1 ) ] ) for trj in data2d ]

print "Fitting the data to single exponentials"
Fits = [ curve_fit( f, np.arange( len( corr ) ), corr )[0] for corr in Autos ]

outName = '.'.join( options.out_FN.split('.')[:-1] )
Fits = np.array( Fits )
np.savetxt( outName + '.dat', Fits )


print "Plotting some fits"
Beispiel #26
0
if len( options.data_FNs ) != len( options.coefs ):
   print "Need to enter a coefficient for each filename! There are %d coefficients for %d filenames listed!" % ( len( options.coefs ), len( options.data_FNs ) )
   exit()

Total = []
nameList =[ 'CombinedData' ]

for i in range( len( options.data_FNs ) ):
   fn = options.data_FNs[i] 
   C = options.coefs[i]
   if C % 1: # Nonzero return from mod 1 means this is not an integer, so write as a float
      nameList.append('%.1e'%C)
   else: # It's an integer!
      nameList.append('%d'%int(C))
   nameList.append(fn)
   dat = dataIO.readData( fn )
   if len( dat.shape ) > 1:
      print "Using first column of data... re-shape the data if this doesn't work."
      dat = dat[:,0]
   if len( dat.shape ) == 0:
      dat = np.array([ dat ])

   Total.append( C * dat )


Total = np.array( Total ).sum(axis=0)

name = dataIO.writeData( nameList, Total, txt=False )

print "Wrote the combination to %s" % name
Beispiel #27
0
parser.add_option('--sr',dest='Rnat_state',help='PDB with the native state in it')
parser.add_option('--cr',dest='coef_rmsd',default=1,type=float,help='Coefficient for RMSD in the linear combination')
parser.add_option('--cq',dest='coef_qnorm',default=1,type=float,help='Coefficient for Q-Norm in the linear combination')
options, args = parser.parse_args()
 
from numpy import *
from msmbuilder import Serializer, DistanceMetric, Trajectory, Conformation
from pyschwancr import dataIO
import os, sys, re

# First load the trajectories
Ass = Serializer.LoadData( options.ass_FN ).astype(int)
Ass1d = Ass[ where( Ass >= 0 ) ].flatten()

if options.coef_qnorm != 0:
	QGens = dataIO.readData( options.Qgen_FN ).astype(uint8)
else:
	QGens = ones( ( Ass.max()+1, 1 ) ).astype(uint8)

RGens = Trajectory.Trajectory.LoadFromLHDF( options.Rgen_FN )

metrics = [ 'rmsd', 'qnorm' ]
coefficients = [ options.coef_rmsd, options.coef_qnorm ]


DistLC = DistanceMetric.LinearCombination( metrics, coefficients )

NatStateXYZ = Conformation.Conformation.LoadFromPDB( options.Rnat_state )

if options.Qnat_state:
	NatStateQData = dataIO.readData( options.Qnat_state )
Beispiel #28
0
parser.add_option('-c',dest='cut',type=float,help='Cutoff to use to color states')
parser.add_option('-d',dest='data_FN',help='Data to use to define folded and unfolded states')
parser.add_option('--low-is-folded',dest='low_is_folded',default=False,action='store_true',help='Pass this flag if a low value of your order parameter indicates a folded state (e.g. RMSD)')
parser.add_option('-w',dest='write_dir',default='./',help='Directory to save output to [ ./ ]')
options, args = parser.parse_args()
 
from numpy import *
import matplotlib
matplotlib.use('agg')
from matplotlib.pyplot import *
from pyschwancr import dataIO, msmTools
import os, sys, re
from scipy.io import mmread

T = mmread( options.tProb )
pops = dataIO.readData( options.pops )
data = dataIO.readData( options.data_FN )[:,1]
cut = options.cut 
print "Loaded Data."

if options.low_is_folded:
   isF = data < cut
else:
   isF = data > cut


degs = msmTools.getDegrees( T )
np.savetxt(os.path.join( options.write_dir,'Degrees.dat' ), degs)
Gs = - log( pops )
Gs = Gs - Gs.max()
plot( degs[ where( 1 - isF ) ], Gs[ where( 1 - isF ) ], 'rs', label='Unfolded States' )
Beispiel #29
0
from pyschwancr import dataIO

# First load map and check if anything was even trimmed...
M = loadtxt(options.mapFN)

if where(M==-1)[0].shape == 0:
	print "No -1 entries in %s ... This probably means nothing was trimmed. Exiting..."%options.mapFN

if options.outFN:
	outFN = options.outFN
elif options.inFN.split('.')[-1] in [ 'txt','dat' ]:
# Then we have an extension so output name should use the first part
	outFN = '.'.join( options.inFN.split('.')[:-1] ) + '.Fixed.dat'
else:
	outFN = options.inFN + '.Fixed.dat'

data = dataIO.readData( options.inFN )

outList = []

for index, line in enumerate(data):
	if M[index] >= 0:
		outList.append( line )

outList = array( outList )

if outList.dtype in [ complex, complex64, complex128 ]:
	save( outFN[:-4]+'.npy', outList )
else:
	savetxt( outFN, array(outList) )
Beispiel #30
0
options, args = parser.parse_args()
import numpy as np
from pyschwancr import dataIO
from scipy import optimize


def f(p, x):
    return p[0] * x + p[1]


def err_f(p, x, y):
    return f(p, x) - y


xDat = dataIO.readData(options.x_data)
yDat = dataIO.readData(options.y_data)

if len(xDat.shape) > 1:
    xDat = xDat[:, 0]
    print "Using first column of x-axis data"

if len(yDat.shape) > 1:
    yDat = yDat[:, 0]
    print "Using first column of x-axis data"

xMax = xDat.max()
xMin = xDat.min()
# xDat = ( xDat - xMin ) / xMax

yMax = yDat.max()