def addGauss( data ): if not options.weight_FN: print "Need weights in order to add gaussians together." means = data[:,1] stds = data[:,2] meanPlus2sigma = means + 2 * stds meanMinus2sigma = means - 2 * stds minX = min( [ 0, meanMinus2sigma.min() ] ) maxX = meanPlus2sigma.max() print minX, maxX weights = dataIO.readData( options.weight_FN ) weights *= sqrt( 2. * pi * stds ) xi = linspace( minX, maxX, 100 ) y = zeros( 100 ) for i in range( data.shape[0] ): y += gauss( [ weights[i], means[i], stds[i] ], xi ) return array( zip( xi, y ) )
def main(): data = dataIO.readData(options.data_FN) data = prepData( data ) maxVal = data[:,1].max() minX = data[:,0].min() maxX = data[:,0].max() temp = (maxX - minX) * 0.1 p0 = [ maxVal, temp, temp, maxVal, maxX - temp, temp ] pF, success = optimize.leastsq( errFnc, p0[:], args=( data[:,0], data[:,1] ), maxfev=100000, xtol=1E-10 ) print pF xFit = linspace(data[:,0].min(),data[:,0].max(),100) yFit = fitFnc( abs(pF), xFit ) plot( data[:,0], data[:,1],'.',label='Data') plot( xFit, yFit, '-r',lw=2,label='Fit') legend() if options.out_FN: ttl = '.'.join( options.out_FN.split('.')[:-1] ) else: ttl = '.'.join( options.data_FN.split('/')[-1].split('.')[:-1]) + 'Fit to Gaussians' xlabel('.'.join( options.data_FN.split('/')[-1].split('.')[:-1] ) ) ylabel('Frequency') title(ttl) if options.out_FN: savefig(options.out_FN[:-4]+'.pdf') else: savefig('.'.join( options.data_FN.split('/')[-1].split('.')[:-1] ) + 'fit2gauss.pdf') print "Wrote parameters to %s" % dataIO.writeData( [ options.out_FN ], pF ) return
def AnalyzeTraj( TrajInd ): print "Working on Trajectories %s and %s" % (QTrajs[ TrajInd ], RTrajs[ TrajInd ] ) QTraj = dataIO.readData( QTrajs[ TrajInd ] ).astype(np.uint8) RTraj = Trajectory.Trajectory.LoadFromLHDF( RTrajs[ TrajInd ] ) TrajDist = DistLC.GetMultiDistance( [ RTraj['XYZList'], QTraj ], [ NatStateXYZ['XYZ'], NatStateQData ] ) return TrajDist
def AnalyzeTraj( trajFN ): print "Working on %s " % trajFN traj = dataIO.readData( trajFN ) X = np.arange( len( traj ) ) timescales = [] for i in xrange( traj.shape[1] ): timescales.append(1. / FitData.ExponFit( X, traj[:,i], LogSample = True )[0] ) return np.array( timescales )
def AnalyzeTraj( trajFN ): # this function analyzes a trajectory to get the drift a given distance metric traj = dataIO.readData( trajFN )[::options.stride] if options.metric.lower() == 'qnorm': traj = traj.astype(uint8) print "Working on %s" % trajFN drifts = [] for i in range(len( traj )-1 ): # Start at one and end one before the last because I can't average over two values. drifts.append( Dist.GetDistance( traj[i], traj[i+1] ) ) drifts = array( drifts ) return drifts
def AnalyzeTraj(trajFN): # this function analyzes a trajectory to get the drift a given distance metric traj = dataIO.readData(trajFN)[:: options.stride] if options.metric.lower() == "qnorm": traj = traj.astype(uint8) print "Working on %s" % trajFN drifts = [] for i in range(len(traj) - 1): # Start at one and end one before the last because I can't average over two values. drifts.append(Dist.GetDistance(traj[i], traj[i + 1])) # print drifts[-1] drifts = array(drifts) midDrifts = (drifts[1:] + drifts[:-1]) / 2.0 outAry = concatenate(([drifts[0]], midDrifts, [drifts[-1]])) return outAry
# First create a list of the trajectories RTrajs = dataIO.getTrajList( options.XYZ_dir ) QTrajs = dataIO.getTrajList( options.Q_dir, RegEx = r'^trj\d+\.npy' ) if len( RTrajs ) != len( QTrajs ): print "Need the same number of trajectories in XYZ_Dir (%s) and Q_Dir (%s)" % ( options.XYZ_dir, options.Q_dir ) exit() metrics = [ 'rmsd', 'qnorm' ] coefficients = [ options.coef_rmsd, options.coef_qnorm ] DistLC = DistanceMetric.LinearCombination( metrics, coefficients ) NatStateXYZ = Conformation.Conformation.LoadFromPDB( options.Rnat_state ) if options.Qnat_state: NatStateQData = dataIO.readData( options.Qnat_state ) else: # Need to get dimension of the QData to generate the native state: Qn = dataIO.readData( QTrajs[0] ).shape[1] NatStateQData = np.ones( Qn ).astype(np.uint8) TotalDists = [] for i in range( len( RTrajs ) ): TotalDists.extend( AnalyzeTraj( i ) ) TotalDists = np.array( TotalDists ) np.save( options.output, TotalDists )
options, args = parser.parse_args() from numpy import * import matplotlib matplotlib.use('pdf') from matplotlib.pyplot import * from pyschwancr import dataIO import os, sys, re FoldTimeList = [] UnfoldTimeList = [] X_values = [] Titles = options.data_FNs maxVal = 0 for fn in Titles: FoldTimeList.append( dataIO.readData( fn + '_FoldTimes.dat' ) ) UnfoldTimeList.append( dataIO.readData( fn + '_UnfoldTimes.dat' ) ) if FoldTimeList[-1].max() > maxVal: maxVal = FoldTimeList[-1].max() if UnfoldTimeList[-1].max() > maxVal: maxVal = UnfoldTimeList[-1].max() m = re.search( '.*_([-.\d]+)', fn ) if m: X_values.append( float( m.group(1) ) ) else: print "Filename contains no number !!!! (In the form .*_[-.\d]+)" exit()
def main(): # First load all the data. print "Loading data ...", Proj = Project.Project.LoadFromHDF( options.proj_FN ) Ass = Serializer.LoadData( options.ass_FN ) xDat = dataIO.readData( options.x_FN )[:,0] yDat = dataIO.readData( options.y_FN )[:,0] print "Done." print "Reformatting raw data...", xFmtd = np.ones( Ass.shape ) * -1 yFmtd = np.ones( Ass.shape ) * -1 lengthSum = 0 for i in range( len( Proj['TrajLengths'] ) ): #print xDat[ lengthSum : lengthSum + Proj['TrajLengths'][i] ].shape #print xFmtd[i][:Proj['TrajLengths'][i]].shape xFmtd[i,:Proj['TrajLengths'][i]] = xDat[ lengthSum : lengthSum + Proj['TrajLengths'][i] ] yFmtd[i,:Proj['TrajLengths'][i]] = yDat[ lengthSum : lengthSum + Proj['TrajLengths'][i] ] lengthSum += Proj['TrajLengths'][i] print "Done." MC_Mover, X_f, X_u, Y_f, Y_u = SetupMover(xDat, yDat) if options.state: whichState = options.state else: whichState = np.random.randint( Ass.max() + 1 ) whichConfs = np.array( np.where( Ass == whichState ) ).T if whichConfs.shape[0] > 100: whichConfs = whichConfs[:100] print "Only using the first 100 conformations in the state... (Out of %d)" % ( len( np.where( Ass == whichState )[0] ) ) # whichConfs will be used to get the pdb's for each conformation. Pfolds_Raw = StartRawTrajs( whichConfs, Proj ) startingPositionsXY = np.array( zip( xFmtd[ np.where( Ass == whichState ) ], yFmtd[ np.where( Ass == whichState ) ] ) ) # These are the starting locations for doing diffusion. Pfolds_MC = [] count = 0. for XY in startingPositionsXY: count += 1. / len( startingPositionsXY ) print "Calculating Diffusion Pfolds. %.2f%% Done.\r" % ( count * 100 ), r0 = MC_Mover.getIndices( XY[0], XY[1] ) Pfolds_MC.append( MC_Pfold( MC_Mover, r0, X_f, X_u, Y_f, Y_u ) ) xDat_name = '.'.join( options.x_FN.split('/')[-1].split('.')[:-1] ) yDat_name = '.'.join( options.y_FN.split('/')[-1].split('.')[:-1] ) np.savetxt('MC_Pfolds_State%d_%s_%s.dat' % ( whichState, xDat_name, yDat_name ), Pfolds_MC ) print "Saved MC_Pfolds to %s" % 'MC_Pfolds_State%d_%s_%s.dat' % ( whichState, xDat_name, yDat_name ) np.savetxt('Raw_Pfolds_State%d.dat' % options.state, Pfolds_Raw ) print "Saved Raw_Pfolds to %s" % 'Raw_Pfolds_State%d.dat' % whichState plot( Pfolds_MC, Pfolds_Raw, '.' ) xlabel('Diffusion Pfolds') ylabel('Raw Pfolds (MD)') title('Raw Pfolds vs Diffusion Pfolds') xlim([0,1]) ylim([0,1]) savefig('State%d_RawVsDiff_%s_%s.pdf' % ( whichState, xDat_name, yDat_name ) ) print "Saved plot to %s" % 'State%d_RawVsDiff_%s_%s.pdf' % ( whichState, xDat_name, yDat_name )
) parser.add_option("-f", dest="f_cut", type=float, help="Folded cutoff. NOTE: greater than f_cut is folded.") parser.add_option("-u", dest="u_cut", type=float, help="Unfolded cutoff. NOTE: less than u_cut is unfolded.") parser.add_option( "-o", dest="outFN", default="MC_vs_Raw_PFolds.dat", help="Output file to write data to [ ./MC_vs_Raw_PFolds.dat ]" ) options, args = parser.parse_args() from pyschwancr import dataIO, MonteCarlo from numpy import * from matplotlib.backends import backend_pdf from matplotlib.pyplot import * from msmbuilder import Project import re xDat = dataIO.readData(options.x_FN) yDat = dataIO.readData(options.y_FN) Proj = Project.Project.LoadFromHDF(options.proj_FN) rawFN = open(options.raw_FN) rawText = rawFN.read() rawLines = rawText.split("trj") rawXY = [] rawPfolds = [] trajLengths = Proj["TrajLengths"] if len(xDat.shape) > 1:
def main(): # First load in all the data: print "Loading Data..." ass = Serializer.LoadData(options.ass_FN) tProb = mmread(options.trans_FN) Proj = Project.LoadFromHDF(options.proj_FN) rawAry = dataIO.readData(options.raw_FN) msmAry = dataIO.readData(options.msm_FN) if options.low_is_folded: testFcn = lambda x: x <= options.fCut else: testFcn = lambda x: x >= options.fCut print "Calculating the raw folded over time..." # Now break up the raw data by trajectory: sum = 0 rawTrajs = [] for i in range( len( Proj['TrajLengths'] ) ): rawTrajs.append( rawAry[ sum : sum + Proj['TrajLengths'][i] ] ) sum += Proj['TrajLengths'][i] # Now calculate the steps it takes to fold for each trajectory: time2fold_raw = [] for trj in rawTrajs: count = 0 for frame in trj: if testFcn(frame): break count += 1 time2fold_raw.append( count ) time2fold_raw = array( time2fold_raw ) rawName = dataIO.writeData( [ "RAW_FractionFolded" ], time2fold_raw ) # Now calculate the msm fraction folded (using msmTools.calcFracFold) x0 = zeros( tProb.shape[0] ) # This start vector is based on the first frame of all the trajectories for i in range( ass.shape[0] ): x0[ ass[i,0] ] += 1. x0 /= float(ass.shape[0]) print "Defining the folded state and calculating the MSM folded over time" # Need to define the folded state. Will use a cutoff, but if there are no states below/above the cutoff then pick the min/max value Fstates = [] for index,stateAvg in enumerate(msmAry[:,1]): if testFcn( stateAvg ): Fstates.append( index ) Fstates = array( Fstates ) if not Fstates.any(): if options.low_is_folded: Fstates = array([ where( msmAry[:,1] == msmAry[:,1].min() ) ] ) else: Fstates = array([ where( msmAry[:,1] == msmAry[:,1].max() ) ] ) N = time2fold_raw.max() / options.lag + 1 time2fold_msm = msmTools.calcFracFold( Fstates, tProb, x0, N = N ) datName = dataIO.writeData( [ "MSM_FractionFolded", str(options.lag) ], time2fold_msm ) print "Saved data to %s" % datName # Now plot everything print "Making plot ..." hist( time2fold_raw, bins=100, histtype='step',label="Raw Data",cumulative=True,normed=True) plot( arange( N ) * options.lag , time2fold_msm, label="MSM") hlines( 1.0, xmin=0, xmax=time2fold_raw.max(),color='red' ) xlim([0,time2fold_raw.max()]) ylim([0,1.25]) legend() xlabel( 'Time (frames)' ) ylabel( 'Fraction Folded' ) if options.title: title( 'Fraction folded over time (%s)' % options.title) else: title( 'Fraction folded over time' ) text( 0.75 * time2fold_raw.max(), 0.2, "N = %d" % ass.shape[0] ) savefig( "FracFolded_%s_rawVsMsm.pdf" % '.'.join( options.raw_FN.split('/')[-1].split('.')[:-1] ) ) print "Plot saved to %s" % ("FracFolded_%s_rawVsMsm.pdf" % '.'.join( options.raw_FN.split('/')[-1].split('.')[:-1] ) )
parser.add_option('-r',dest='raw_FN',help='Raw data to plot against it\'s average') parser.add_option('-m',dest='msm_FN',help='Average data for each state') parser.add_option('-u',dest='stride',type=int,help='Stride to color some data with') options, args = parser.parse_args() from numpy import * import matplotlib matplotlib.use('agg') from matplotlib.pyplot import * from pyschwancr import dataIO from msmbuilder import Project print "Loading Data" Proj = Project.Project.LoadFromHDF( options.proj_FN ) Ass = Project.Serializer.LoadData( options.ass_FN ).astype( int ) Raw = dataIO.readData( options.raw_FN ) if len( Raw.shape ) > 1: print "Using column 0 of raw data..." Raw = Raw[:,0] Avg = dataIO.readData( options.msm_FN ) # Reformat the raw data to the assignments style: fmtRaw = ones( Ass.shape ) * -1 lengthSum = 0 for i in range( len( Proj['TrajLengths'] ) ): fmtRaw[i][:Proj['TrajLengths'][i]] = Raw[ lengthSum : lengthSum + Proj['TrajLengths'][i] ] lengthSum += Proj['TrajLengths'][i] # The above may not seem necessary, since we reformat it and then make it 1D again, # but since there may be trimmed states, we need to remove them, This was the best way I saw to do that. Avg1D = Avg[:,1][ Ass[ where( Ass != -1 ) ] ].flatten()
import numpy as np from scipy.io import mmread from msmbuilder import tpt from pyschwancr import dataIO import os, sys, re def AnalyzeIndex(state): print "Working on state %d" % state return tpt.GetMFPTsolve([state], T, LagTime=Lag) # return TPT.GetMFPTFundMat( state, T, Pops, LagTime=Lag ) print "Loading data..." T = mmread(options.T_FN) Pops = dataIO.readData(options.pops_FN) Lag = options.lag outFN = options.out_FN if options.state_FN: F = dataIO.readData(options.state_FN) # If there is a list of states use it else: # Otherwise check the MFPTs for ALL states in T F = np.arange(T.shape[0]) print "Calculating MFPTs" sol = [] for state in F: sol.append(AnalyzeIndex(state)) ResAry = np.array(sol).T # This is the results array. The data will be stored as columns, so the i,j th entry will be the MFPT from state i to state j
options, args = parser.parse_args() # This script will read in data about Pfolds and plot the forward committors on the y-axis and the Pfolds calculated (along with their standard deviations on the x-axis # The two plots will be: # 1) A plot of all data points # 2) A plot of the avg plus StdDev import matplotlib matplotlib.use('Pdf') from matplotlib.pyplot import * from numpy import * from pyschwancr import dataIO import re msm = dataIO.readData(options.msm_FN) rawFN = open( options.raw_FN, 'r' ) rawList = rawFN.read().split('State') rawList.pop(0) # There is an initial '' since It splits at the first characters. rawData = {} for state in rawList: stateList = state.split('\n') stateDat = [] for conf in stateList[1:]: m = re.search("Folded\s*=\s*(\d+)\s*Unfolded\s*=\s*(\d+)",conf) if m: N_unfolded = int( m.group( 2 ) ) N_folded = int( m.group( 1 ) ) stateDat.append( N_folded / float( N_folded + N_unfolded ) )
options, args = parser.parse_args() from numpy import * import matplotlib matplotlib.use('pdf') from matplotlib.pyplot import * from pyschwancr import dataIO import os, sys, re from scipy.optimize import curve_fit X_values = [] Titles = options.data_FNs DatList = [] for fn in Titles: DatList.append( dataIO.readData( fn ) ) m = re.search( '.*_([-.\d]+)', fn ) if m: X_values.append( float( m.group(1) ) ) else: print "Filename contains no number !!!! (In the form .*_[-.\d]+)" exit() print "Loaded Data." # First make the individual plots: figure() for i in range( len( Titles ) ): plot( DatList[i], label = str( X_values[i] ) + '%' )
import matplotlib matplotlib.use('pdf') from matplotlib.pyplot import * #from matplotlib.backends.backend_pdf import PdfPages from scipy import interpolate from pyschwancr import dataIO from numpy import log10 as l10 import re #matplotlib.rc('text',usetex=True) if options.font_size != None: matplotlib.rcParams['font.size'] = options.font_size print "IF THE INPUT DATA IS COMPLEX, THE PLOTS WILL ONLY BE THE REAL PART!!!" X = dataIO.readData( options.xFN ).real Y = dataIO.readData( options.yFN ).real if options.xFN[-3:] == '.h5': X = X[np.where(X!=-1)] if options.yFN[-3:] == '.h5': Y = Y[np.where(Y!=-1)] if len( X.shape ) > 1: if X.shape[1] == 1: print "X formatted strangely... Using X[:,0]" X = X[:,0] else: X = X[:,1] print "X formatted strangely... Using X[:,1]"
options, args = parser.parse_args() import numpy as np import matplotlib matplotlib.use('pdf') from matplotlib.pyplot import * from pyschwancr import dataIO import os, sys, re print "Loading Data" Ms = [] Sts = [] for mat_fn, state_fn in zip( options.mfpt_list, options.states_list ): Ms.append( dataIO.readData( mat_fn ) ) Sts.append( dataIO.readData( state_fn ) ) Ms = np.hstack( Ms ) Sts = np.concatenate( Sts ).astype(int) Data = dataIO.readData( options.data_FN )[:,1] cutF = options.cutF cutU = options.cutU if options.low_is_folded: isFary = Data <= cutF isUary = Data >= cutU else: isFary = Data >= cutF isUary = Data <= cutU
#!/usr/bin/env python from optparse import OptionParser parser = OptionParser() parser.add_option( "-d", dest="dir", help="Directory to look for data (stateAvg_Qtot.dat.Fixed, Degrees.dat, Populations.dat)" ) options, args = parser.parse_args() import numpy as np from pyschwancr import dataIO import os, sys, re pops = dataIO.readData(os.path.join(options.dir, "Populations.dat")) deg = dataIO.readData(os.path.join(options.dir, "Degrees.dat")) try: q = dataIO.readData(os.path.join(options.dir, "stateAvg_Qtot.dat.Fixed"))[:, 1] isF = q > 0.4 isU = q <= 0.4 except: try: r = dataIO.readData(os.path.join(options.dir, "stateAvg_RMSD.Fixed.dat"))[:, 1] isF = r < 0.4 isU = r > 0.4 except: print "Need either stateAvg_[Qtot,RMSD].Fixed.dat in the directory" exit() sumDegF = int(deg[isF].sum())
parser.add_option('-g',dest='gamma',default=1.,type=float, help="Friction coefficient") parser.add_option('-t',dest='temp',default=100.,type=float,help="Temperature") parser.add_option('--dt',dest='dt',default=0.0005,type=float, help="Timestep") parser.add_option('-n',dest='num_iter',default=100, type=int, help="Number of iterations to perform") parser.add_option('-o',dest='out_FN',help="Output filename") options, args = parser.parse_args() from pyschwancr import BD, dataIO from numpy import * import matplotlib matplotlib.use('pdf') from matplotlib.pyplot import * import random import re V = - 1.38E-23 * options.temp * np.log( 10**( dataIO.readData( options.pot_FN ) ) ) V = V.T # This is needed to swap the axes Nx, Ny = V.shape print V.shape dx = 1. / (Nx - 1.) dy = 1. / (Ny - 1.) imshow(V.T,extent=[0,1,0,1],origin='bottom',cmap='jet_r') # V has x-axis = axis 0, but need to transpose to use imshow correctly def Force( R ): wallForce = 100 # Need to get the indices to look in V: I_x = int( R[0] * ( Nx - 1 ) ) I_y = int( R[1] * ( Ny - 1 ) ) if I_x >= Nx-1:
import os, sys, re def AnalyzeState( state ): print "Working on state %d" % state stateInd = np.where( ass == state ) if not belowCut[ stateInd ].sum() in [ 0, stateInd[0].shape[0] ]: # split the state return True else: return False return False # Read in the data proj = Project.Project.LoadFromHDF( options.proj_FN ) data = dataIO.readData( options.data_FN ) ass = Serializer.LoadData( options.ass_FN ) rmsd = Serializer.LoadData( options.ass_FN + '.RMSD' ) # This could end poorly.... but I can add another parameter if need be... gens = Trajectory.Trajectory.LoadFromLHDF( options.gens_FN ) print "Loaded the data" if os.path.isdir( options.write_dir ): print "Directory exists, will write data to %s... Careful, this could cause options, since overwriting will CRASH this script" % options.write_dir else: os.mkdir( options.write_dir ) print "Made output directory (%s)" % options.write_dir data2d = msmTools.reshapeRawData( data, proj ) maxState = ass.max() belowCut = ( data2d <= options.cutoff )
) parser.add_option("-o", dest="out_FN", default="Fold_Unfold_Times.pdf", help="Output file to write to") options, args = parser.parse_args() from numpy import * from msmbuilder import Project from pyschwancr import dataIO, msmTools import os, sys, re import matplotlib matplotlib.use("pdf") from matplotlib.pyplot import * from scipy import optimize Proj = Project.Project.LoadFromHDF(options.proj_FN) Data = dataIO.readData(options.data_FN) # first reshape the data into trajectories. Lens = Proj["TrajLengths"] Trajs = [] sum = 0 for i in range(len(Lens)): Trajs.append(Data[sum : sum + Lens[i]]) sum += Lens[i] Folds = [] Unfolds = [] for traj in Trajs: (a, b) = msmTools.calcRawFoldTime(traj, options.f_cut, options.u_cut, low_is_folded=options.low_is_folded)
#!/usr/bin/env python from optparse import OptionParser parser = OptionParser() parser.add_option('-f', dest='input_FN', default='./stateAvg_RMSD.Fixed.dat', help='Input stateAvg_RMSD.dat' ) options, args = parser.parse_args() from msmbuilder import Serializer from pyschwancr import dataIO import os, sys, re msmAvg = dataIO.readData( options.input_FN ) avgs = msmAvg[:,1] vars = msmAvg[:,2] ** 2 s = Serializer.Serializer( { 'state_mean_rmsd': avgs, 'state_var_rmsd': vars } ) s.SaveToHDF( 'ClusterStats.hdf' )
def main(): # Need to construct the list of trajframes to use. First look at what the values are for the ones already done. OldTrajFrames = [] for tempDir in dirs2check: tempList = [ fn for fn in os.listdir( tempDir ) if re.search( 'trj\d+_frm\d+$', fn ) ] tempTrajFrames = [ re.search( 'trj(\d+)_frm(\d+)$', fn ).groups() for fn in tempList ] tempTrajFrames = [ [ int( a ), int( b ) ] for (a,b) in tempTrajFrames ] OldTrajFrames.extend( tempTrajFrames ) Xdat = dataIO.readData( options.x_dat ) Ydat = dataIO.readData( options.y_dat ) uniqX = unique( Xdat ) uniqY = unique( Ydat ) diffX = abs( uniqX[1:] - uniqX[:-1] ).min() diffY = abs( uniqY[1:] - uniqY[:-1] ).min() Nx = 1. / diffX Ny = 1. / diffY Xdat = Xdat * ( Nx ) Ydat = Ydat * ( Ny ) if len( Xdat.shape ) > 1: Xdat = Xdat[:,0] if len( Ydat.shape ) > 1: Ydat = Ydat[:,0] Xdat2D = msmTools.reshapeRawData( Xdat.astype(int), Proj ) Ydat2D = msmTools.reshapeRawData( Ydat.astype(int), Proj ) x_interval = [ float( i ) for i in options.x_int.split(',') ] y_interval = [ float( i ) for i in options.y_int.split(',') ] x_range = arange( int( x_interval[0] * Nx ), int( x_interval[1] * Nx ) + 1 ) y_range = arange( int( y_interval[0] * Ny ), int( y_interval[1] * Ny ) + 1 ) #print x_range, y_range #print Xdat.max(), Ydat.max() #print OldTrajFrames # Now construct the list of traj frames to use in the analysis TrajFrames = [] print "Finding conformations..." for Xi in x_range: for Yi in y_range: whichTrajFrames = array( where( ( Xdat2D == Xi ) * ( Ydat2D == Yi) ) ).T if len( whichTrajFrames ) <= 4: ToAddTrajFrames = [ list( i ) for i in whichTrajFrames ] else: ToAddTrajFrames = [ list( pair ) for pair in whichTrajFrames if list( pair ) in OldTrajFrames ] # The above list contains pairs for this x,y for which the simulations have already been done. if len( ToAddTrajFrames ) > 4: ToAddTrajFrames = [ list( pair ) for pair in random.permutation( ToAddTrajFrames )[:4] ] while len( ToAddTrajFrames ) < 4: randPair = whichTrajFrames[ random.randint( len( whichTrajFrames ) ) ] randPair = list( randPair ) # print '\t', randPair, ToAddTrajFrames if randPair in ToAddTrajFrames: continue else: ToAddTrajFrames.append( randPair ) TrajFrames.extend( ToAddTrajFrames ) # print Xi, Yi, ToAddTrajFrames, [ ( Xdat2D[ tuple(pair) ], Ydat2D[ tuple(pair) ] ) for pair in ToAddTrajFrames ] print TrajFrames print "Running the simulations..." run( TrajFrames )
import matplotlib from matplotlib.backends.backend_pdf import PdfPages from matplotlib.pyplot import * from pyschwancr import dataIO, msmTools import os, sys, re Ass = Serializer.LoadData( args.ass_FN ) Proj = Serializer.LoadFromHDF( args.proj_FN ) if (Ass.max()+1) > 100: print "You have %d states... This is going to be a large pdf file..." % (Ass.max()+1) pp = PdfPages( args.out_FN ) Data = dataIO.readData( args.data_FN ) if len(Data.shape) == 1: Data = msmTools.reshapeRawData( Data, Proj ) x0 = 0 x1 = int(Data.max()+1) if args.x_lbl != None: x_lbl = ' '.join( args.x_lbl ) else: x_lbl = 'Data in State' for i in range( Ass.max() + 1 ): figure()
from pyschwancr import dataIO, msmTools import os, sys, re from scipy.optimize import curve_fit import matplotlib.pyplot as plt import numpy as np import warnings warnings.filterwarnings('ignore','Warning: overflow encountered in exp') def f( x , a, b ): return np.exp( - b * x ) print "Loading data" proj = Serializer.Serializer.LoadFromHDF( options.proj_FN ) data = dataIO.readData( options.raw_FN ) data2d = msmTools.reshapeRawData( data, proj ) print "Calculating autocorrelations" Autos = [ autocorrelate.fft_autocorrelate( trj[ np.where( trj != -1 ) ] ) for trj in data2d ] print "Fitting the data to single exponentials" Fits = [ curve_fit( f, np.arange( len( corr ) ), corr )[0] for corr in Autos ] outName = '.'.join( options.out_FN.split('.')[:-1] ) Fits = np.array( Fits ) np.savetxt( outName + '.dat', Fits ) print "Plotting some fits"
if len( options.data_FNs ) != len( options.coefs ): print "Need to enter a coefficient for each filename! There are %d coefficients for %d filenames listed!" % ( len( options.coefs ), len( options.data_FNs ) ) exit() Total = [] nameList =[ 'CombinedData' ] for i in range( len( options.data_FNs ) ): fn = options.data_FNs[i] C = options.coefs[i] if C % 1: # Nonzero return from mod 1 means this is not an integer, so write as a float nameList.append('%.1e'%C) else: # It's an integer! nameList.append('%d'%int(C)) nameList.append(fn) dat = dataIO.readData( fn ) if len( dat.shape ) > 1: print "Using first column of data... re-shape the data if this doesn't work." dat = dat[:,0] if len( dat.shape ) == 0: dat = np.array([ dat ]) Total.append( C * dat ) Total = np.array( Total ).sum(axis=0) name = dataIO.writeData( nameList, Total, txt=False ) print "Wrote the combination to %s" % name
parser.add_option('--sr',dest='Rnat_state',help='PDB with the native state in it') parser.add_option('--cr',dest='coef_rmsd',default=1,type=float,help='Coefficient for RMSD in the linear combination') parser.add_option('--cq',dest='coef_qnorm',default=1,type=float,help='Coefficient for Q-Norm in the linear combination') options, args = parser.parse_args() from numpy import * from msmbuilder import Serializer, DistanceMetric, Trajectory, Conformation from pyschwancr import dataIO import os, sys, re # First load the trajectories Ass = Serializer.LoadData( options.ass_FN ).astype(int) Ass1d = Ass[ where( Ass >= 0 ) ].flatten() if options.coef_qnorm != 0: QGens = dataIO.readData( options.Qgen_FN ).astype(uint8) else: QGens = ones( ( Ass.max()+1, 1 ) ).astype(uint8) RGens = Trajectory.Trajectory.LoadFromLHDF( options.Rgen_FN ) metrics = [ 'rmsd', 'qnorm' ] coefficients = [ options.coef_rmsd, options.coef_qnorm ] DistLC = DistanceMetric.LinearCombination( metrics, coefficients ) NatStateXYZ = Conformation.Conformation.LoadFromPDB( options.Rnat_state ) if options.Qnat_state: NatStateQData = dataIO.readData( options.Qnat_state )
parser.add_option('-c',dest='cut',type=float,help='Cutoff to use to color states') parser.add_option('-d',dest='data_FN',help='Data to use to define folded and unfolded states') parser.add_option('--low-is-folded',dest='low_is_folded',default=False,action='store_true',help='Pass this flag if a low value of your order parameter indicates a folded state (e.g. RMSD)') parser.add_option('-w',dest='write_dir',default='./',help='Directory to save output to [ ./ ]') options, args = parser.parse_args() from numpy import * import matplotlib matplotlib.use('agg') from matplotlib.pyplot import * from pyschwancr import dataIO, msmTools import os, sys, re from scipy.io import mmread T = mmread( options.tProb ) pops = dataIO.readData( options.pops ) data = dataIO.readData( options.data_FN )[:,1] cut = options.cut print "Loaded Data." if options.low_is_folded: isF = data < cut else: isF = data > cut degs = msmTools.getDegrees( T ) np.savetxt(os.path.join( options.write_dir,'Degrees.dat' ), degs) Gs = - log( pops ) Gs = Gs - Gs.max() plot( degs[ where( 1 - isF ) ], Gs[ where( 1 - isF ) ], 'rs', label='Unfolded States' )
from pyschwancr import dataIO # First load map and check if anything was even trimmed... M = loadtxt(options.mapFN) if where(M==-1)[0].shape == 0: print "No -1 entries in %s ... This probably means nothing was trimmed. Exiting..."%options.mapFN if options.outFN: outFN = options.outFN elif options.inFN.split('.')[-1] in [ 'txt','dat' ]: # Then we have an extension so output name should use the first part outFN = '.'.join( options.inFN.split('.')[:-1] ) + '.Fixed.dat' else: outFN = options.inFN + '.Fixed.dat' data = dataIO.readData( options.inFN ) outList = [] for index, line in enumerate(data): if M[index] >= 0: outList.append( line ) outList = array( outList ) if outList.dtype in [ complex, complex64, complex128 ]: save( outFN[:-4]+'.npy', outList ) else: savetxt( outFN, array(outList) )
options, args = parser.parse_args() import numpy as np from pyschwancr import dataIO from scipy import optimize def f(p, x): return p[0] * x + p[1] def err_f(p, x, y): return f(p, x) - y xDat = dataIO.readData(options.x_data) yDat = dataIO.readData(options.y_data) if len(xDat.shape) > 1: xDat = xDat[:, 0] print "Using first column of x-axis data" if len(yDat.shape) > 1: yDat = yDat[:, 0] print "Using first column of x-axis data" xMax = xDat.max() xMin = xDat.min() # xDat = ( xDat - xMin ) / xMax yMax = yDat.max()