Exemple #1
0
def test_traj_0():
    
    aind = np.unique( np.random.randint( 22, size=4) )
    stride = np.random.randint(1, 100 )
    
    r_traj = get('Trajectories/trj0.lh5')

    r_traj.restrict_atom_indices( aind )

    r_traj['XYZList'] = r_traj['XYZList'][ ::stride ]

    traj = Trajectory.load_from_lhdf(get('Trajectories/trj0.lh5', just_filename=True),
        Stride=stride, AtomIndices=aind)

    # make sure we loaded the right number of atoms
    assert traj['XYZList'].shape[1] == len(aind)

    for key in traj.keys():
        if key in ['SerializerFilename'] :
            continue
        
        if key in ['IndexList']:
            for row, r_row in zip( traj[key], r_traj[key] ):
                eq(row, r_row)
        elif key == 'XYZList':
            eq(traj[key], r_traj[key])
        else:
            eq(traj[key], r_traj[key])
Exemple #2
0
def get_project_object( traj_directory, conf_filename, out_filename=None ):
    """
    This function constructs a msmbuilder.Project object 
    given a directory of trajectories saved as .lh5's. 

    Note that this is only really necessary when a script
    like ConvertDataToLHDF.py converts the data but fails
    to write out the ProjectInfo.yaml file.

    This function can also be used to combine two projects
    by copying and renaming the trajectories in a new 
    folder. Though, it's probably more efficient to just
    do some bash stuff to cat the ProjectInfo.yaml's 
    together and rename the trajectories.
    
    Inputs:
    -------
    1) traj_directory : directory to find the trajectories
    2) conf_filename : file to find the conformation
    3) out_filename [ None ] : if None, then this function 
        does not save the project file, but if given, the
        function will save the project file and also
        return the object

    Outputs:
    -------
    project : msmbuilder.Project object corresponding to 
        your project.
    """

    traj_paths = sorted( os.listdir( traj_directory ), key=keynat ) # relative to the traj_directory
    traj_paths = [ os.path.join( traj_directory, filename ) for filename in traj_paths ] # relative to current directory

    traj_lengths = []

    for traj_filename in traj_paths: # Get the length of each trajectory
        logger.info( traj_filename )
        traj_lengths.append( Trajectory.load_from_lhdf( traj_filename, JustInspect=True )[0] ) 
        # With JustInspect=True this just returns the shape of the XYZList

    project = Project({'conf_filename': conf_filename,
                       'traj_lengths': traj_lengths,
                       'traj_paths': traj_paths,
                       'traj_errors': [None] * len(traj_paths),
                       'traj_converted_from': [ [None] ] * len(traj_paths) })

    if out_filename is None:
        return project
    else:
        project.save( out_filename )
        logger.info('Saved project file to %s', out_filename)
        return project
Exemple #3
0
def test_traj_0():
    
    aind = np.unique( np.random.randint( 22, size=4) )
    stride = np.random.randint(1, 100 )
    
    r_traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride=1 )

    r_traj.restrict_atom_indices( aind )

    r_traj['XYZList'] = r_traj['XYZList'][ ::stride ]

    traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride = stride, AtomIndices = aind )

    for key in traj.keys():
        if key in ['SerializerFilename'] :
            continue
        
        if key in ['IndexList']:
            for row, r_row in zip( traj[key], r_traj[key] ):
                npt.assert_array_equal( row, r_row )
        elif key == 'XYZList':
            npt.assert_array_almost_equal( traj[key], r_traj[key])
        else:
            npt.assert_array_equal( traj[key], r_traj[key] )
def main(genfile):
    dir=os.path.dirname(genfile)
    traj=Trajectory.load_from_lhdf(genfile)
    rmsd=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0])
    atom_pairs=numpy.loadtxt('./atompairs.dat', dtype=int)
    names=numpy.loadtxt('./atompairs-map.txt', usecols=(2,),  dtype=str)
    indices=[i for i in atom_pairs]
    for (ind, name) in zip(indices, names):
        index1=numpy.zeros((1,2)) # here i just need to tell _dihedralcalc that we have one set of 4 coordinates
        index1[0]=ind
        pairmetric= metrics.AtomPairs(metric='euclidean', p=1, atom_pairs=index1)
        distances=pairmetric.prepare_trajectory(traj)
        pylab.figure()
        pylab.scatter(rmsd, distances, label=name)
        pylab.legend()
        pylab.show()
        numpy.savetxt('%s_%spair.dat' % (genfile.split('.lh5')[0], name), distances)
    index1=numpy.zeros((1,4)) # here i just need to tell _dihedralcalc that we have one set of 4 coordinates
    index1[0]=numpy.loadtxt('omega_indices.txt', dtype=int, ndmin=1)
    index2=numpy.zeros((1,4)) # here i just need to tell _dihedralcalc that we have one set of 4 coordinates
    index2[0]=numpy.loadtxt('phi_indices.txt', dtype=int, ndmin=1)
    indices=[index1, index2]
    names=['omega', 'phi']
    for (ind, name) in zip(indices, names):
        dihed=_dihedralcalc.compute_dihedrals(traj['XYZList'], ind, degrees=True)
        dihed=[i[0] for i in dihed]
        numpy.savetxt('%s_%s.dat' % (genfile.split('.lh5')[0], name), dihed)
    atom_pairs=numpy.loadtxt('conformation_pairs.dat', dtype=int)
    names=['oxos_dist', 'hydrophob_dist']
    for (pair, name) in zip(atom_pairs, names):
        index=numpy.zeros((1,2), dtype=int)
        index[0]=pair
        metric= metrics.AtomPairs(metric='euclidean', p=1, atom_pairs=index)
        distances=metric.prepare_trajectory(traj)
        distances=[i[0]*10 for i in distances]
        pylab.figure()
        pylab.scatter(rmsd, distances, label=name)
        pylab.legend()
        pylab.show()
        numpy.savetxt('%s_pairs.dat' % genfile.split('.lh5')[0], distances)
def run( project, output, num_procs=1, chunk_size=50000, traj_fn='all' ):

    pool = mp.Pool( num_procs )

    dssp_assignments = []

    if traj_fn.lower() == 'all':

        for i in xrange( project.n_trajs ):
            traj_dssp_assignments = []
            N = project.traj_lengths[i]
            j = 0
            for trj_chunk in Trajectory.enum_chunks_from_lhdf( project.traj_filename( i ), ChunkSize=chunk_size ):
                result = pool.map_async( analyze_conf, trj_chunk['XYZList'] )
                result.wait()

                traj_dssp_assignments.extend( result.get() )

                j+=len(trj_chunk)
                print "Trajectory %d: %d / %d" % (i, j, N)
            dssp_assignments.append( traj_dssp_assignments )
    
    else:
        traj_dssp_assignments = []
        N = Trajectory.load_from_lhdf(traj_fn, JustInspect=True)[0]
        j = 0
        for trj_chunk in Trajectory.enum_chunks_from_lhdf(traj_fn, ChunkSize=chunk_size):
            result = pool.map_async(analyze_conf, trj_chunk['XYZList'])
            result.wait()

            traj_dssp_assignments.extend(result.get())

            j+=len(trj_chunk)
            print "Trajectory %s: %d / %d" % (traj_fn, j, N)
        dssp_assignments.append(traj_dssp_assignments)

    dssp_assignments = np.array( dssp_assignments )
    np.save( output, dssp_assignments )
    DEVNULL.close()
Exemple #6
0
def main(coarse_val, orig_val, rcut):
    data=dict()
    data['coarse']=dict()
    data['orig']=dict()
    dirs=dict()
    dirs['coarse']='./d%s' % coarse_val
    dirs['orig']='./d%s' % orig_val
    proj=Project.load_from('ProjectInfo.yaml')
    types=['ass', 'rmsd', 'dist', 'gens']
    for key in ['coarse', 'orig']:
        for type in types:
            if 'ass' in type:
                ass=io.loadh('%s/Data/Assignments.h5' % dirs[key])
                data[key][type]=ass['arr_0']
            elif 'dist' in type:
                ass=io.loadh('%s/Data/Assignments.h5.distances' % dirs[key])
                data[key][type]=ass['arr_0']
            elif 'rmsd' in type:
                rmsd=numpy.loadtxt('%s/Gens.rmsd.dat' % dirs[key])
                data[key][type]=rmsd
            elif 'gens' in type:
                gens=Trajectory.load_from_lhdf('%s/Gens.lh5' % dirs[key])
                data[key][type]=gens
    unboundmap=dict()
    boundmap=dict()
    #unboundstates=dict()
    #unboundrmsd=dict()
    # build map dict for orig to coarse unbound states, bound will stay same
    newass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int)
    for j in range(0, data['orig']['ass'].shape[0]):
        for (n,i) in enumerate(data['orig']['ass'][j]):
            # if unbound
            if i != -1:
                if data['orig']['rmsd'][i] > float(rcut):
                    state=data['coarse']['ass'][j][n]
                    newass[j][n]=state+10000
                else:
                    newass[j][n]=i
    count=0
    unique=set(newass.flatten())
    boundmap=dict()
    unboundmap=dict()
    for x in unique:
        locations=numpy.where(newass==x)
        newass[locations]=count
        if x >= 10000:
            unboundmap[count]=(x-10000)
        else:
            boundmap[count]=x
        count+=1
    io.saveh('%s/Coarsed_r%s_Assignments.h5' % (dirs['orig'], rcut), newass)
    subdir='%s/Coarsed_r%s_gen/' % (dirs['orig'], rcut)
    if not os.path.exists(subdir):
        os.mkdir(subdir)
    ohandle=open('%s/Coarsed%s_r%s_Gens.rmsd.dat' % (subdir, coarse_val, rcut), 'w')
    b=data['orig']['gens']['XYZList'].shape[1]
    c=data['orig']['gens']['XYZList'].shape[2]
    dicts=[boundmap, unboundmap]
    names=['bound', 'unbound']
    labels=['orig', 'coarse']
    total=len(boundmap.keys()) + len(unboundmap.keys())
    structure=proj.empty_traj()
    structure['XYZList']=numpy.zeros((total, b, c), dtype='float32')
    count=0
    for (name, label, mapdata) in zip( names, labels, dicts):
        print "writing coarse gen %s out of %s pdbs" % (count, len(mapdata.keys()))
        for i in sorted(mapdata.keys()):
            macro=mapdata[i]
            structure['XYZList'][count]=data[label]['gens']['XYZList'][macro]
            ohandle.write('%s\t%s\t%s\n' % (name, count, data[label]['rmsd'][macro]))
            print name, count
            count+=1
    structure.save_to_xtc('%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut))
Exemple #7
0
def main(modeldir, gensfile, write=False):
    if not os.path.exists('%s/eig-states/' % modeldir):
        os.mkdir('%s/eig-states/' % modeldir)
    ohandle=open('%s/eig-states/eiginfo.txt' % modeldir, 'w')
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    gens=Trajectory.load_from_lhdf(gensfile)
    T=mmread('%s/tProb.mtx' % modeldir)
    data=dict()
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0])
    com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,))
    data['com']=com[1:]
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)

    map_rmsd=[]
    map_com=[]
    for x in range(0, len(data['rmsd'])):
        if map[x]!=-1:
            map_com.append(data['com'][x])
            map_rmsd.append(data['rmsd'][x])
    
    map_com=numpy.array(map_com)
    map_rmsd=numpy.array(map_rmsd)
    T=mmread('%s/tProb.mtx' % modeldir)
    eigs_m=msm_analysis.get_eigenvectors(T, 10)

    cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive

    print numpy.shape(eigs_m[1][:,1])
    for i in range(0,1):
        order=numpy.argsort(eigs_m[1][:,i])
        if i==0:
            maxes=[]
            gen_maxes=[]
            values=[]
            ohandle.write('eig%s maxes\n' % i)
            ohandle.write('state\tgenstate\tmagnitude\trmsd\tcom\n')
            for n in order[::-1][:5]:
                gen_maxes.append(numpy.where(map==n)[0])
                maxes.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "maxes at ",  maxes, values
            maxes=numpy.array(maxes)
            if write==True:
                get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max')
        else:
            maxes=[]
            gen_maxes=[]
            values=[]
            ohandle.write('eig%s maxes\n' % i)
            for n in order[::-1][:5]:
                gen_maxes.append(numpy.where(map==n)[0])
                maxes.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "maxes at ",  maxes, values
            order=numpy.argsort(eigs_m[1][:,i])
            mins=[]
            gen_mins=[]
            values=[]
            ohandle.write('eig%s mins\n' % i)
            for n in order[:5]:
                gen_mins.append(numpy.where(map==n)[0])
                mins.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "mins at ",  mins, values
            if write==True:
                get_structure(modeldir, i, gen_maxes,  maxes, gens, project, ass, type='max')
                get_structure(modeldir, i, gen_mins,  mins, gens, project, ass, type='min')
        pylab.scatter(map_com[order], map_rmsd[order], c=eigs_m[1][order,i], cmap=cm, s=1000*abs(eigs_m[1][order,i]), alpha=0.5)
        print map_com[order][numpy.argmax(eigs_m[1][order,i])]
        print eigs_m[1][order,i][1]
        CB=pylab.colorbar()
        l,b,w,h=pylab.gca().get_position().bounds
        ll, bb, ww, hh=CB.ax.get_position().bounds
        CB.ax.set_position([ll, b+0.1*h, ww, h*0.8])
        CB.set_label('Eig%s Magnitudes' % i)
        ylabel=pylab.ylabel('Ligand RMSD to Xtal ($\AA$)')
        xlabel=pylab.xlabel(r'P Active Site - L COM Distance ($\AA$)')
        pylab.legend(loc=8, frameon=False)
        pylab.savefig('%s/2deigs%i_com_prmsd.png' %(modeldir, i),dpi=300)
Exemple #8
0
usage = """Usage:   python Cluster_Variance.py input_name num1  
    Try:  python Cluster_Variance.py rgdf_nmev 100
"""
if len(sys.argv) < 2:
    print usage
    sys.exit(1)

input_name = sys.argv[1]
num1 = int(sys.argv[2])

input_numbers = [num1]

if __name__ == "__main__":

    traj = Trajectory.load_from_lhdf('Trajectories/trj0.lh5')

    #Load in all the pair data and add to dictionary#
def read_NOE_data(filename):

    os.system('''cat %s  | awk '{FS=" "}{print $3}' > atom1.txt'''%filename)
    os.system('''cat %s  | awk '{FS=" "}{print $4}' > atom2.txt'''%filename)
    os.system('''cat %s  | awk '{FS=" "}{print $8}' > pair_number.txt'''%filename)

    peptide_data = {}
    peptide_data['atom1'] = np.loadtxt('atom1.txt')
    peptide_data['atom2'] = np.loadtxt('atom2.txt')
    # Make sure that the pair_number tags start at 0
    peptide_data['pair_number'] = np.loadtxt('pair_number.txt') - 1
    
    return peptide_data
Exemple #9
0
def analyzeTraj( trjFN ):

	print "Working on trajectory %s" % trjFN
	trj = Trajectory.load_from_lhdf(trjFN)

	return rg.calculate_rg(trj['XYZList'])
def main(dir):
    t=Trajectory.load_from_lhdf('%s/Gens.lh5' % dir)
    t.save_to_xtc('%s/Gens.xtc' % dir)
Exemple #11
0
def main(coarse_val, orig_val, rcut):
    data=dict()
    data['coarse']=dict()
    data['orig']=dict()
    dirs=dict()
    dirs['coarse']='./d%s' % coarse_val
    dirs['orig']='./d%s' % orig_val
    proj=Project.load_from('ProjectInfo.yaml')
    types=['ass', 'rmsd', 'dist', 'gens']
    for key in ['coarse', 'orig']:
        for type in types:
            if 'ass' in type:
                ass=io.loadh('%s/Data/Assignments.h5' % dirs[key])
                data[key][type]=ass['arr_0']
            elif 'dist' in type:
                ass=io.loadh('%s/Data/Assignments.h5.distances' % dirs[key])
                data[key][type]=ass['arr_0']
            elif 'rmsd' in type:
                rmsd=numpy.loadtxt('%s/Gens.rmsd.dat' % dirs[key])
                data[key][type]=rmsd
            elif 'gens' in type:
                gens=Trajectory.load_from_lhdf('%s/Gens.lh5' % dirs[key])
                data[key][type]=gens
    unboundmap=dict()
    boundmap=dict()
    # build map dict for orig to coarse unbound states, bound will stay same
    unboundass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int)
    newass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int)
    newdist=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]))
    for j in range(0, data['orig']['ass'].shape[0]):
        rmsd=numpy.loadtxt('Trajectories-metric/trj%s_lprmsd.dat' % j)
        frames=numpy.where(data['orig']['ass'][j]!=-1)[0]
        if len(rmsd)!=len(frames):
            print "trajectory mismatch"
            import pdb
            pdb.set_trace()
        for (n,i) in enumerate(data['orig']['ass'][j]):
            # if unbound
            if i != -1:
                #if data['orig']['rmsd'][i] > float(rcut):
                if rmsd[n] > float(rcut):
                    newstate=data['coarse']['ass'][j][n]
                    if data['coarse']['rmsd'][newstate] < float(rcut):
                        newass[j][n]=i
                        newdist[j][n]=data['orig']['dist'][j][n]
                    else:
                        unboundass[j][n]=newstate
                        newdist[j][n]=data['coarse']['dist'][j][n]
                else:
                    newass[j][n]=i
                    newdist[j][n]=data['orig']['dist'][j][n]
    count=0
    unique=sorted(set(newass.flatten()))
    newass, boundmap, count=remap_ass(newass, newass, unique, count)
    unique=sorted(set(unboundass.flatten()))
    newass, unboundmap, count=remap_ass(unboundass, newass, unique, count)
    io.saveh('%s/Coarsed_r%s_d%s_Assignments.h5' % (dirs['orig'], rcut, coarse_val), newass)
    io.saveh('%s/Coarsed_r%s_d%s_Assignments.distances.h5' % (dirs['orig'], rcut, coarse_val), newdist)
    subdir='%s/Coarsed_r%s_gen/' % (dirs['orig'], rcut)
    if not os.path.exists(subdir):
        os.mkdir(subdir)
    ohandle=open('%s/Coarsed%s_r%s_Gens.rmsd.dat' % (subdir, coarse_val, rcut), 'w')
    b=data['orig']['gens']['XYZList'].shape[1]
    c=data['orig']['gens']['XYZList'].shape[2]
    dicts=[boundmap, unboundmap]
    names=['bound', 'unbound']
    labels=['orig', 'coarse']
    total=len(boundmap.keys()) + len(unboundmap.keys())
    structure=proj.empty_traj()
    structure['XYZList']=numpy.zeros((total, b, c), dtype='float32')
    count=0
    for (name, label, mapdata) in zip( names, labels, dicts):
        print "writing coarse gen %s out of %s pdbs" % (count, len(mapdata.keys()))
        for i in sorted(mapdata.keys()):
            macro=mapdata[i]
            structure['XYZList'][count]=data[label]['gens']['XYZList'][macro]
            ohandle.write('%s\t%s\t%s\n' % (name, count, data[label]['rmsd'][macro]))
            print name, count
            count+=1
    otraj='%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut)
    if os.path.exists(otraj):
        os.remove(otraj)
    structure.save_to_xtc('%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut))