Beispiel #1
0
def main():
    global data2d
    global As
    # First I need to turn the assignments matrix into a 1D list of assignments
    sys.stdout = os.fdopen(sys.stdout.fileno(),'w',0)
    print "Reading in Assignments... from %s " % options.assFN
    As = io.loadh(options.assFN)['arr_0'].astype(int)
    print "Reading in data... from %s " % options.dataFN
    try: 
        f = io.loadh( options.dataFN )
        try:
            data2d = f['arr_0']
        except:
            data2d = f['Data']
    except:
        data = load(options.dataFN)
        proj = Project.load_from( options.projFN )
        data2d = msmTools.reshapeRawData( data, proj )

    print "Calculating averages for:"

    pool = mp.Pool(options.procs)
    clusters = range( As.max() + 1)
    result = pool.map_async(calcAvg,clusters[:])
    result.wait()
    sol = result.get()
    sol = array(sol)
    savetxt(options.outFN, sol)

    return
def main(modeldir, genfile,  type):
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    data=dict()
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0])
    data['rmsd']=data['rmsd'][frames]
    com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'])
    data['com']=data['com'][frames]

    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)
    paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type))
    
    for p in range(0, 20):
        movie=project.empty_traj()
        path=paths['Paths'][p]
        flux=paths['fluxes'][p]/paths['fluxes'][0]
        if flux < 0.2:
            break
        print "flux %s" % flux
        frames=numpy.where(path!=-1)[0]
        path=numpy.array(path[frames], dtype=int)
        for (n, state) in enumerate(path):
            t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 20)
            if n==0:
                movie['XYZList']=t[0]['XYZList']
            else:
                movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
        movie.save_to_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p))
def run(MinLagtime, MaxLagtime, Interval, NumEigen, AssignmentsFn, symmetrize,
        nProc, output):

    arglib.die_if_path_exists(output)

    # Setup some model parameters
    try:
        Assignments = io.loadh(AssignmentsFn, 'arr_0')
    except KeyError:
        Assignments = io.loadh(AssignmentsFn, 'Data')

    NumStates = max(Assignments.flatten()) + 1
    if NumStates <= NumEigen - 1:
        NumEigen = NumStates - 2
        logger.warning(
            "Number of requested eigenvalues exceeds the rank of the transition matrix! Defaulting to the maximum possible number of eigenvalues."
        )
    del Assignments

    logger.info("Getting %d eigenvalues (timescales) for each lagtime...",
                NumEigen)
    lagTimes = range(MinLagtime, MaxLagtime + 1, Interval)
    logger.info("Building MSMs at the following lag times: %s", lagTimes)

    # Get the implied timescales (eigenvalues)
    impTimes = msm_analysis.get_implied_timescales(AssignmentsFn,
                                                   lagTimes,
                                                   n_implied_times=NumEigen,
                                                   sliding_window=True,
                                                   symmetrize=symmetrize,
                                                   n_procs=nProc)
    numpy.savetxt(output, impTimes)
    return
Beispiel #4
0
    def test_2(self):
        try:
            subprocess.Popen('ipcluster start --cluster-id=testclusterid --n=1 --daemonize', shell=True)
            time.sleep(5)
            
            args = self.Args()
            args.output_dir = tempfile.mkdtemp()
            args.cluster_id = 'testclusterid'

            logger = AssignParallel.setup_logger()
            AssignParallel.main(args, self.metric, logger)

            assignments = io.loadh(os.path.join(args.output_dir, 'Assignments.h5'), 'arr_0')
            r_assignments = io.loadh(os.path.join(fixtures_dir(), 'Assignments.h5'), 'Data')
            distances = io.loadh(os.path.join(args.output_dir, 'Assignments.h5.distances'), 'arr_0')
            r_distances = io.loadh(os.path.join(fixtures_dir(), 'Assignments.h5.distances'), 'Data')

            npt.assert_array_equal(assignments, r_assignments)
            npt.assert_array_almost_equal(distances, r_distances)
        
        except:
            raise
        finally:
            shutil.rmtree(args.output_dir)
            subprocess.Popen('ipcluster stop --cluster-id=testclusterid', shell=True).wait()
Beispiel #5
0
def main():
    """Parse command line inputs, load up files, and build a movie."""

    parser = arglib.ArgumentParser(description="""
Create an MSM movie by sampling a sequence of states and sampling a 
random conformation from each state in the sequence.  
""")
    parser.add_argument('project')
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument('tprob', default='Data/tProb.mtx')
    parser.add_argument('num_steps')
    parser.add_argument('starting_state', type=int, help='''Which state to start trajectory from.''')
    parser.add_argument('output', default='sample_traj.pdb', help="""The filename of your output trajectory.  The filetype suffix will be used to select the output file format.""")
    args = parser.parse_args()

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
    
    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)
    
    project = Project.load_from(args.project)    
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Beispiel #6
0
    def test_2(self):
        try:
            subprocess.Popen(
                'ipcluster start --cluster-id=testclusterid --n=1 --daemonize',
                shell=True)
            time.sleep(5)

            args = self.Args()
            args.output_dir = tempfile.mkdtemp()
            args.cluster_id = 'testclusterid'

            logger = AssignParallel.setup_logger()
            AssignParallel.main(args, self.metric, logger)

            assignments = io.loadh(
                os.path.join(args.output_dir, 'Assignments.h5'), 'arr_0')
            r_assignments = io.loadh(
                os.path.join(fixtures_dir(), 'Assignments.h5'), 'Data')
            distances = io.loadh(
                os.path.join(args.output_dir, 'Assignments.h5.distances'),
                'arr_0')
            r_distances = io.loadh(
                os.path.join(fixtures_dir(), 'Assignments.h5.distances'),
                'Data')

            npt.assert_array_equal(assignments, r_assignments)
            npt.assert_array_almost_equal(distances, r_distances)

        except:
            raise
        finally:
            shutil.rmtree(args.output_dir)
            subprocess.Popen('ipcluster stop --cluster-id=testclusterid',
                             shell=True).wait()
def run(MinLagtime, MaxLagtime, Interval, NumEigen, AssignmentsFn, symmetrize, nProc, output):

    arglib.die_if_path_exists(output)

    # Setup some model parameters
    try:
        Assignments = io.loadh(AssignmentsFn, "arr_0")
    except KeyError:
        Assignments = io.loadh(AssignmentsFn, "Data")

    NumStates = max(Assignments.flatten()) + 1
    if NumStates <= NumEigen - 1:
        NumEigen = NumStates - 2
        logger.warning(
            "Number of requested eigenvalues exceeds the rank of the transition matrix! Defaulting to the maximum possible number of eigenvalues."
        )
    del Assignments

    logger.info("Getting %d eigenvalues (timescales) for each lagtime...", NumEigen)
    lagTimes = range(MinLagtime, MaxLagtime + 1, Interval)
    logger.info("Building MSMs at the following lag times: %s", lagTimes)

    # Get the implied timescales (eigenvalues)
    impTimes = msm_analysis.get_implied_timescales(
        AssignmentsFn, lagTimes, n_implied_times=NumEigen, sliding_window=True, symmetrize=symmetrize, n_procs=nProc
    )
    numpy.savetxt(output, impTimes)
    return
Beispiel #8
0
def load(tica_fn, metric):
    """
    load a tICA solution to use in projecting data.

    Parameters:
    -----------
    tica_fn : str
        filename pointing to tICA solutions
    metric : metrics.Vectorized subclass instance
        metric used to prepare trajectories

    """
    # the only variables we need to save are the two matrices
    # and the eigenvectors / values as well as the lag time
    
    logger.warn("NOTE: You can only use the tICA solution, you will "
                "not be able to continue adding data")
    f = io.loadh(tica_fn)

    tica_obj = tICA(f['lag'][0], prep_metric=metric)
    # lag entry is an array... with a single item

    tica_obj.timelag_corr_mat = f['timelag_corr_mat']
    tica_obj.cov_mat = f['cov_mat']

    tica_obj.vals = f['vals']
    tica_obj.vecs = f['vecs']

    tica_obj._sort()

    return tica_obj
Beispiel #9
0
def run(tProb, observable, init_pops=None, num_vecs=10, output='evec_amps.h5'):

    if init_pops is None:
        init_pops = np.ones(tProb.shape[0]).astype(float) / float(tProb.shape[0])

    else:
        init_pops = init_pops.astype(float) 
        init_pops /= init_pops.sum()

    assert (observable.shape[0] == init_pops.shape[0])
    assert (observable.shape[0] == tProb.shape[0])
    
    try:
        f = io.loadh('eigs%d.h5' % num_vecs)
        vals = f['vals']
        vecsL = f['vecs']
    except:
        vals, vecsL = msm_analysis.get_eigenvectors(tProb, num_vecs + 1, right=False)
        io.saveh('eigs%d.h5' % num_vecs, vals=vals, vecs=vecsL)

    equil = vecsL[:,0] / vecsL[:,0].sum()

    dyn_vecsL = vecsL[:, 1:]
    # normalize the left and right eigenvectors

    dyn_vecsL /= np.sqrt(np.sum(dyn_vecsL * dyn_vecsL / np.reshape(equil, (-1, 1)), axis=0))

    dyn_vecsR = dyn_vecsL / np.reshape(equil, (-1, 1))

    amps = dyn_vecsL.T.dot(observable) * dyn_vecsR.T.dot(init_pops)

    io.saveh(output, evals=vals[1:], amplitudes=amps)
    logger.info("saved output to %s" % output)
Beispiel #10
0
def main(file):
    ass=io.loadh(file)
    dir=os.path.dirname(file)
    base=os.path.basename(file)
    newdir='%s/subsample' % dir
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    p=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    data=dict()
    totals=dict()
    iterations=int(ass['arr_0'].shape[1]/10.0)
    start=max(p.traj_lengths)
    for iter in range(0, iterations):
        new=start-10
        if new < 10:
            break
        totals[new]=0
        data[new]=-numpy.ones((ass['arr_0'].shape[0], new), dtype=int)
        for i in range(0, ass['arr_0'].shape[0]):
            data[new][i]=ass['arr_0'][i][:new]
            frames=numpy.where(data[new][i]!=-1)[0]
            totals[new]+=len(frames)
        start=new

    ohandle=open('%s/times.h5' % (newdir), 'w')
    for key in sorted(data.keys()):
        print data[key].shape
        print "total time is %s" % totals[key]
        ohandle.write('%s\t%s\t%s\n' % (data[key].shape[0], data[key].shape[1], totals[key]))
Beispiel #11
0
    def load_from(cls, filename):
        """
        Load project from disk

        Parameters
        ----------
        filename : string
            filename_or_file can be a path to a legacy .h5 or current
            .yaml file.

        Returns
        -------
        project : the loaded project object

        """

        rootdir = os.path.abspath(os.path.dirname(filename))

        if filename.endswith('.yaml'):
            with open(filename) as f:
                ondisk = yaml.load(f)
                records = {
                    'conf_filename': ondisk['conf_filename'],
                    'traj_lengths': [],
                    'traj_paths': [],
                    'traj_converted_from': [],
                    'traj_errors': []
                }

                for trj in ondisk['trajs']:
                    records['traj_lengths'].append(trj['length'])
                    records['traj_paths'].append(trj['path'])
                    records['traj_errors'].append(trj['errors'])
                    records['traj_converted_from'].append(
                        trj['converted_from'])

        elif filename.endswith('.h5'):
            ondisk = io.loadh(filename, deferred=False)
            n_trajs = len(ondisk['TrajLengths'])
            records = {
                'conf_filename': str(ondisk['ConfFilename'][0]),
                'traj_lengths': ondisk['TrajLengths'],
                'traj_paths': [],
                'traj_converted_from': [None] * n_trajs,
                'traj_errors': [None] * n_trajs
            }

            for i in xrange(n_trajs):
                # this is the convention used in the hdf project format to get the traj paths
                path = os.path.join(
                    ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] +
                    str(i) + ondisk['TrajFileType'][0])
                records['traj_paths'].append(path)

        else:
            raise ValueError('Sorry, I can only open files in .yaml'
                             ' or .h5 format: %s' % filename)

        return cls(records, validate=True, project_dir=rootdir)
def main(assfile, lag, nproc):
    lag=int(lag)
    nproc=int(nproc)
    Assignments=io.loadh(assfile)
    num=int(assfile.split('Assignments_sub')[1].split('.h5')[0])
    dir=os.path.dirname(assfile)
    newdir='%s/boot-sub%s' % (dir, num)
    ref_sub=numpy.loadtxt('%s/times.h5' % dir, usecols=(1,))
    ref_total=numpy.loadtxt('%s/times.h5' % dir, usecols=(2,))
    times=dict()
    for (i,j) in zip(ref_sub, ref_total):
        times[i]=j

    proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    multinom=int(times[num])
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    if 'Data' in Assignments.keys():
        Assignments=Assignments['Data']
    else:
        Assignments=Assignments['arr_0']
    print Assignments.shape
    NumStates = max(Assignments.flatten()) + 1
    Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True)
    Counts=Counts.todense()
    Counts=Counts*(1.0/lag)
    T=numpy.array(Counts)
    frames=numpy.where(T==0)
    T[frames]=1
    Popsample=dict()
    iteration=0
    total_iteration=100/nproc
    print "%s total iterations" % total_iteration
    if 100 % nproc != 0:
        remain=100 % nproc
    else:
        remain=False
    print "iterating thru tCount samples"
    count=0
    while iteration < 100:
        if count*nproc > 100:
            nproc=remain
        print "sampling iteration %s" % iteration
        Tfresh=T.copy()
        input = zip([Tfresh]*nproc, [multinom]*nproc, range(0, NumStates))
        pool = multiprocessing.Pool(processes=nproc)
        result = pool.map_async(parallel_get_matrix, input)
        result.wait()
        all = result.get()
        pool.terminate()
        for c_matrix in all:
            scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), c_matrix)
            #rev_counts, t_matrix, Populations, Mapping=x
            #scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix)
            #numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations)
            #numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping)
            iteration+=1
        count+=1
        print "dont with iteration %s" % iteration*nproc
Beispiel #13
0
def main(modeldir, genfile, type, write=False):
    proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    data=dict()
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0])
    data['rmsd']=data['rmsd'][frames]
    com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'][frames])

    residues=['F36', 'H87', 'I56', 'I90', 'W59', 'Y82', 'hydrophob_dist', 'oxos_dist']
    loops=['loop1', 'loop2', 'loop3']
    for loop in loops:
        data[loop]=numpy.loadtxt('%s.%srmsd.dat' % (genfile.split('.lh5')[0], loop))
        data[loop]=data[loop][frames]
    for res in residues:
        file='%s_%spair.dat' % (genfile.split('.lh5')[0], res)
        if os.path.exists(file):
            data[res]=numpy.loadtxt(file)
            data[res]=data[res][frames]
    angles=['phi', 'omega']
    for ang in angles:
        file='%s_%s.dat' % (genfile.split('.lh5')[0], ang)
        if os.path.exists(file):
            data[ang]=numpy.loadtxt(file)
            data[ang]=data[ang][frames]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    bound=numpy.loadtxt('%s/tpt-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int)

    Tdense=T.todense()
    Tdata=dict()
    for i in unbound:
        for j in unbound:
            if Tdense[i,j]!=0:
                if i not in Tdata.keys():
                    Tdata[i]=[]
                Tdata[i].append(j)
    #print Tdata
    cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive
    Q=tpt.calculate_committors(unbound, bound, T)
    ohandle=open('%s/commitor_states.txt' % modeldir, 'w')
    for i in range(0,len(Q)):
        if Q[i]>0.40 and Q[i]<0.6:
            ohandle.write('%s\n' % i)
            #t=project.get_random_confs_from_states(ass['arr_0'], [int(i),], 20)
            #t[0].save_to_xtc('%s/commottor_state%s.xtc' % (modeldir, i))
    if write==True:
        for op in sorted(data.keys()):
            pylab.figure()
            pylab.scatter(data['com'], data[op],  c=Q, cmap=cm, alpha=0.7, s=[map_size(i) for i in Q])
            pylab.xlabel('L RMSD')
            pylab.ylabel(op)
            pylab.colorbar()
        pylab.show()
Beispiel #14
0
def main(modeldir, genfile,  type, write=False):
    data=dict()
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]

    unbound=numpy.loadtxt('%s/tpt-rmsd-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    bound=numpy.loadtxt('%s/tpt-rmsd-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int)

    dir=modeldir.split('Data')[0]
    name=glob.glob('%s/fkbp*xtal*pdb' % dir)
    pdb=Trajectory.load_from_pdb(name[0])
    paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type))

    committors=numpy.loadtxt('%s/commitor_states.txt' % modeldir, dtype=int)
    colors=['red', 'orange', 'green', 'cyan', 'blue', 'purple']
    colors=colors*40
    if type=='strict':
        ref=5
    elif type=='super-strict':
        ref=3
    elif type=='medium':
        ref=10
    elif type=='loose':
        ref=15
    #for p in range(0, 3):
    for p in range(0, 1):
        path=paths['Paths'][p]
        print "Bottleneck", paths['Bottlenecks'][p]
        flux=paths['fluxes'][p]/paths['fluxes'][0]
        if flux < 0.2:
            break
        print "flux %s" % flux
        frames=numpy.where(path!=-1)[0]
        path=numpy.array(path[frames], dtype=int)
        print path
        if write==True:
            size=(paths['fluxes'][p]/paths['fluxes'][0])*1000
            traj=Trajectory.load_from_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p), Conf=pdb)
            data=build_metric(dir, pdb, traj)
            dir=modeldir.split('Data')[0]
            for op in sorted(data.keys()):
            #for op in residues:
                pylab.figure()
                pylab.scatter(data['rmsd'], data[op], c=colors[p], alpha=0.7) #, s=size)
                for j in paths['Bottlenecks'][p]:
                    frame=numpy.where(paths['Paths'][p]==j)[0]
                    pylab.scatter(data['rmsd'][frame*20], data[op][frame*20], marker='x', c='k', alpha=0.7, s=50)
                    location=numpy.where(committors==paths['Paths'][p][frame])[0]
                    if location.size:
                        print "path %s state %s bottleneck in committors" % (p, j)
                        print data['rmsd'][frame*20], data[op][frame*20]
                pylab.title('path %s' % p)
                pylab.xlabel('P-L RMSD')
                #pylab.xlabel('P-L COM')
                pylab.ylabel(op)
                pylab.xlim(0,max(data['rmsd'])+5)
                #pylab.ylim(0,max(data[op])+5)
                pylab.show()
Beispiel #15
0
    def check_container(filename):
        ondisk = io.loadh(filename, deferred=False)
        if n_vtrajs != len(ondisk['hashes']):
            raise ValueError('You asked for {} vtrajs, but your checkpoint \
file has {}'.format(n_vtrajs, len(ondisk['hashes'])))
        if not np.all(ondisk['hashes'] ==
                hashes):
            raise ValueError('Hash mismatch. Are these checkpoint files for \
the right project?')
Beispiel #16
0
 def dump_count_matrix(self,assignfn,lagtime=1,outfn="count_matrix.txt"):
     from msmbuilder import io
     from msmbuilder import MSMLib
     
     assignments = io.loadh(assignfn, 'arr_0')
     # returns sparse lil_matrix
     counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lagtime,
                                                       sliding_window=True)
     counts = counts.tocoo()
     np.savetxt(outfn,(counts.row, counts.col, counts.data))
def main(assfile, lag, nproc):
    lag=int(lag)
    nproc=int(nproc)
    Assignments=io.loadh(assfile)
    dir=os.path.dirname(assfile)
    newdir='%s/sample-counts' % dir
    proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0])
    multinom=sum(proj.traj_lengths)
    if not os.path.exists(newdir):
        os.mkdir(newdir)
    if 'Data' in Assignments.keys():
        Assignments=Assignments['Data']
    else:
        Assignments=Assignments['arr_0']
    print Assignments.shape
    NumStates = max(Assignments.flatten()) + 1
    Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True)
    Counts=Counts.todense()
    Counts=Counts*(1.0/lag)
    T=numpy.array(Counts)
    frames=numpy.where(T==0)
    T[frames]=1
    Popsample=dict()
    iteration=0
    total_iteration=100/nproc
    print "%s total iterations" % total_iteration
    if 100 % nproc != 0:
        remain=100 % nproc
    else:
        remain=False
    print "iterating thru tCount samples"
    count=0
    while iteration < 100:
        if count*nproc > 100:
            nproc=remain
        print "sampling iteration %s" % iteration
        Tfresh=T.copy()
        counts=range(0, nproc)
        input = zip([Tfresh]*nproc, [multinom]*nproc, [NumStates]*nproc, counts)
        pool = multiprocessing.Pool(processes=nproc)
        result = pool.map_async(parallel_get_matrix, input)
        result.wait()
        all = result.get()
        print "computed resampled matrices"
        pool.terminate()
        for count_matrix in all:
            #rev_counts, t_matrix, Populations, Mapping=x
            scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), count_matrix)
           # scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix)
           # numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations)
           # numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping)
            iteration+=1
        count+=1
        print "dont with iteration %s" % iteration*nproc
Beispiel #18
0
def main(modeldir, start, type):
    start=int(start)
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0])
    pdb=files[0]
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    T=mmread('%s/tProb.mtx' % modeldir)
    startstate=unbound[start]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    steps=100000
    print "on start state %s" % startstate
    if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)):
        print "loading from states"
        traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate))
    else:
        traj=msm_analysis.sample(T, int(startstate),int(steps))
        numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj)
    print "checking for chkpt file"
    checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate))
    if len(checkfile) > 0:
        movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb)
        n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0])
        os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0]))
        print "checkpointing at state index %s out of %s" % (n, len(traj))
        checkfile=checkfile[0]
        restart=True
    else:
        restart=False
        n=0
        movie=project.empty_traj()
    while n < len(traj):
        print "on state %s" % n
        state=int(traj[n])
        t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10)
        if n==0:
            movie['XYZList']=t[0]['XYZList']
            n+=1
            continue
        elif n % 100==0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            if restart==True:
                os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0]))
            movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n))
            checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)
            n+=1
            continue
        elif n!=0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            n+=1
            continue
    movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
def main(modeldir, gensfile, rcut=None):
    mapdata=dict()
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    data=dict()
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0])
    com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,))
    data['com']=com[1:]
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)

    mapdata['rmsd']=[]
    mapdata['com']=[]
    for x in range(0, len(data['rmsd'])):
        if map[x]!=-1:
            mapdata['com'].append(data['com'][x])
            mapdata['rmsd'].append(data['rmsd'][x])

    #RMSD cutoff 
    cutoffs=numpy.arange(1,30,0.5)
    bound_pops=[]
    for type in mapdata.keys(): 
        pylab.figure()
        ohandle=open('%s/%s_msm_frees.dat' % (modeldir, type), 'w')
        data=[]
        for cutoff in cutoffs:
            bound_pops=[]
            for (state, x) in enumerate(mapdata['rmsd']):
                if x < cutoff:
                    bound_pops.append(pops[state])
            ### calculate binding free energy from populations
            if len(bound_pops)==0:
                dG=100
            else:
                bound=numpy.sum(bound_pops)
                unbound=1-bound
                dG=-0.6*numpy.log(bound/unbound)
                #dG=-0.6*numpy.log(bound/(unbound**2))
            
            ### calculate standard state correction, in ansgtroms here
            boxvolume=244.80*(10**3)
            v0=1600
            corr=-0.6*numpy.log(boxvolume/v0)
            dG_corr=dG+corr 
            if cutoff==float(rcut):
                print cutoff, dG_corr
            data.append(dG_corr)
            ohandle.write('%s\t%s\n' % (cutoff, dG_corr))
        pylab.plot(cutoffs, data, label=type)
        pylab.legend()
        pylab.ylim(-8, (-1)*corr)
    pylab.show()
Beispiel #20
0
    def load_from(cls, filename):
        """
        Load project from disk

        Parameters
        ----------
        filename : string
            filename_or_file can be a path to a legacy .h5 or current
            .yaml file.

        Returns
        -------
        project : the loaded project object

        """

        rootdir = os.path.abspath(os.path.dirname(filename))

        if filename.endswith('.yaml'):
            with open(filename) as f:
                ondisk = yaml.load(f)
                records = {'conf_filename': ondisk['conf_filename'],
                           'traj_lengths': [],
                           'traj_paths': [],
                           'traj_converted_from': [],
                           'traj_errors': []}

                for trj in ondisk['trajs']:
                    records['traj_lengths'].append(trj['length'])
                    records['traj_paths'].append(trj['path'])
                    records['traj_errors'].append(trj['errors'])
                    records['traj_converted_from'].append(trj['converted_from'])

        elif filename.endswith('.h5'):
            ondisk = io.loadh(filename, deferred=False)
            n_trajs = len(ondisk['TrajLengths'])
            records = {'conf_filename': str(ondisk['ConfFilename'][0]),
                       'traj_lengths': ondisk['TrajLengths'],
                       'traj_paths': [],
                       'traj_converted_from': [ [None] ] * n_trajs,
                       'traj_errors': [None] * n_trajs}

            for i in xrange(n_trajs):
                # this is the convention used in the hdf project format to get the traj paths
                path = os.path.join( ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] + str(i) + ondisk['TrajFileType'][0] )
                records['traj_paths'].append(path)

        else:
            raise ValueError('Sorry, I can only open files in .yaml'
                             ' or .h5 format: %s' % filename)

        return cls(records, validate=False, project_dir=rootdir)
Beispiel #21
0
    def check_container(filename):
        ondisk = io.loadh(filename, deferred=False)
        if n_vtrajs != len(ondisk["hashes"]):
            raise ValueError(
                "You asked for {} vtrajs, but your checkpoint \
file has {}".format(
                    n_vtrajs, len(ondisk["hashes"])
                )
            )
        if not np.all(ondisk["hashes"] == hashes):
            raise ValueError(
                "Hash mismatch. Are these checkpoint files for \
the right project?"
            )
Beispiel #22
0
def main(modeldir):
    proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    data=dict()
    data['dist']=numpy.loadtxt('%s/prot_lig_distance.dat' % modeldir, usecols=(1,))
    data['rmsd']=numpy.loadtxt('%s/Gens.rmsd.dat' % modeldir, usecols=(2,))
    com=numpy.loadtxt('%s/Gens.vmd_com.dat' % modeldir, usecols=(1,))
    refcom=com[0]
    data['com']=com[1:]
    data['com']=numpy.array(data['com'])
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]
    pylab.scatter(data['com'][frames], data['rmsd'][frames])
    pylab.scatter([refcom,], [0,], c='k', marker='x', s=100)
    pylab.xlabel('P-L COM')
    pylab.ylabel('P-L RMSD')
    pylab.show()
Beispiel #23
0
def main(modeldir):
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)
    T=mmread('%s/tProb.mtx' % modeldir)

    if not os.path.exists('%s/adaptive-states/' % modeldir):
        os.mkdir('%s/adaptive-states/' % modeldir)
    for state in sorted(set(ass['arr_0'].flatten())):
        if state!=-1:
            t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 5)
            for i in range(0, 5):
                print state, i
                (a, b, c) =t[0]['XYZList'].shape
                movie=project.empty_traj()
                movie['XYZList']=numpy.zeros((1, b, c), dtype=numpy.float32)
                movie['XYZList'][0]=t[0]['XYZList'][i]
                movie.save_to_pdb('%s/adaptive-states/state%s-%s.pdb' % (modeldir, int(state), i))
Beispiel #24
0
    def load_from_disk(cls, filename):
        """Load up a clusterer from disk

        This is useful because computing the Z-matrix
        (done in __init__) is the most expensive part, and assigning is cheap

        Parameters
        ----------
        filename : str
            location to save to

        Raises
        ------
        TODO: Probablt raises something if filename doesn't exist?
        """
        data = io.loadh(filename, deferred=False)
        Z, traj_lengths = data['z_matrix'], data['traj_lengths']
        #Next two lines are a hack to fix Serializer bug. KAB
        if np.rank(traj_lengths)==0:
            traj_lengths = [traj_lengths]
        return cls(None, None, precomputed_values=(Z, traj_lengths))
Beispiel #25
0
    def load_from_disk(cls, filename):
        """Load up a clusterer from disk

        This is useful because computing the Z-matrix
        (done in __init__) is the most expensive part, and assigning is cheap

        Parameters
        ----------
        filename : str
            location to save to

        Raises
        ------
        TODO: Probablt raises something if filename doesn't exist?
        """
        data = io.loadh(filename, deferred=False)
        Z, traj_lengths = data['z_matrix'], data['traj_lengths']
        #Next two lines are a hack to fix Serializer bug. KAB
        if np.rank(traj_lengths) == 0:
            traj_lengths = [traj_lengths]
        return cls(None, None, precomputed_values=(Z, traj_lengths))
Beispiel #26
0
 def test_load_2(self):
     "load using deferred=False"
     TestData = io.loadh(self.filename1, deferred=False)['arr_0']
     npt.assert_array_equal(TestData, self.data)
Beispiel #27
0
def main_learn(args):
    "main method for the learn subcommand"
    s = io.loadh(args.triplets)
    metric_string = ''.join(s['metric'])

    if args.learn_method == 'diagonal':

        # type conversion
        alpha = float(args.alpha)

        rho, weights = lmmdm.optimize_diagonal(s['AtoB'], s['AtoC'], alpha, loss='huber')

        if metric_string == 'dihedral':
            metric = metrics.Dihedral(metric='seuclidean', V=weights)
        elif metric_string == 'drmsd':
            metric = metrics.AtomPairs(metric='seuclidean', V=weights, atom_pairs=s['atom_pairs'])
        elif metric_string == 'rmsd':
            metric = WRMSD(metric='seuclidean', V=weights)
        elif metric_string == 'recipcontact':
            metric = metrics.ContinuousContact(contacts='all', scheme='CA',
                metric='seuclidean', V=weights)
        else:
            raise NotImplementedError('Sorry')


        # save to disk
        pickle.dump(metric, open(args.metric, 'w'))
        print 'Saved metric pickle to {}'.format(args.metric)
        np.save(args.matrix, [weights, rho])
        print 'Saved weights as flat text to {}'.format(args.matrix)

    elif args.learn_method == 'dense':
        initialize = args.initialize
        if not args.initialize in ['euclidean', 'diagonal']:
            try:
                initialize = np.load(initialize)
            except IOError as e:
                print >> sys.stderr, '''-i --initialize must be either "euclidean",
                    "diagonal", or the path to a flat text matrix'''
                print >> sys.stderr, e
                sys.exit(1)

        # type conversion
        alpha, epsilon = map(float, [args.alpha, args.epsilon])
        outer_iterations, inner_iterations = map(int,
            [args.outer_iterations, args.inner_iterations])

        rho, weights = lmmdm.optimize_diagonal(s['AtoB'], s['AtoC'], alpha, loss='huber')
        rho, metric_matrix = lmmdm.optimize_dense(s['AtoB'], s['AtoC'], alpha, rho, np.diag(weights),
            loss='huber', epsilon=1e-5, max_outer_iterations=outer_iterations,
            max_inner_iterations=inner_iterations)

        if metric_string == 'dihedral':
            metric = metrics.Dihedral(metric='mahalanobis', VI=metric_matrix)
        elif metric_string == 'drmsd':
            metric = metrics.AtomPairs(metric='mahalanobis', VI=metric_matrix, atom_pairs=s['atom_pairs'])
        elif metric_string == 'rmsd':
            metric = WRMSD(metric='mahalanobis', VI=metric_matrix)
        elif metric_string == 'recipcontact':
            metric = metrics.ContinuousContact(contacts='all', scheme='CA', metric='mahalanobis',
                VI=metrix_matrix)
        else:
            raise NotImplementedError('Sorry')

        # save to disk
        pickle.dump(metric, open(args.metric, 'w'))
        print 'Saved metric pickle to {}'.format(args.metric)
        np.save(args.matrix, [metric_matrix, rho])
        print 'Saved weights, rho to {}'.format(args.matrix)
Beispiel #28
0
 def test_load_2(self):
     "load using deferred=True"
     deferred = io.loadh(self.filename1, deferred=True)
     npt.assert_array_equal(deferred['arr_0'], self.data)
     deferred.close()
Beispiel #29
0
 def test_save(self):
     """Save HDF5 to disk and load it back up"""
     io.saveh(self.filename2, self.data)
     TestData = io.loadh(self.filename2, 'arr_0')
     npt.assert_array_equal(TestData, self.data)
Beispiel #30
0
 def test_load_1(self):
     "Load by specifying array name"
     TestData = io.loadh(self.filename1, 'arr_0')
     npt.assert_array_equal(TestData, self.data)
Beispiel #31
0
#!/usr/bin/env python
from msmbuilder import io, MSMLib
from scipy.io import mmwrite
import sys
from msmbuilder import arglib

parser = arglib.ArgumentParser()
parser.add_argument("assignments")
parser.add_argument("lagtime", type=int, default=1)
parser.add_argument("sliding_window", default=False, action="store_true")
parser.add_argument("output", default="tCounts.raw.mtx")
args = parser.parse_args()

try:
    ass = io.loadh(args.assignments)["Data"]
except:
    ass = io.loadh(args.assignments)["arr_0"]

C = MSMLib.get_count_matrix_from_assignments(ass, lag_time=args.lagtime, sliding_window=args.sliding_window)

print C.sum()
mmwrite(args.output, C)
Beispiel #32
0
def main(modeldir, gensfile, write=False):
    if not os.path.exists('%s/eig-states/' % modeldir):
        os.mkdir('%s/eig-states/' % modeldir)
    ohandle=open('%s/eig-states/eiginfo.txt' % modeldir, 'w')
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    gens=Trajectory.load_from_lhdf(gensfile)
    T=mmread('%s/tProb.mtx' % modeldir)
    data=dict()
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0])
    com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,))
    data['com']=com[1:]
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)

    map_rmsd=[]
    map_com=[]
    for x in range(0, len(data['rmsd'])):
        if map[x]!=-1:
            map_com.append(data['com'][x])
            map_rmsd.append(data['rmsd'][x])
    
    map_com=numpy.array(map_com)
    map_rmsd=numpy.array(map_rmsd)
    T=mmread('%s/tProb.mtx' % modeldir)
    eigs_m=msm_analysis.get_eigenvectors(T, 10)

    cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive

    print numpy.shape(eigs_m[1][:,1])
    for i in range(0,1):
        order=numpy.argsort(eigs_m[1][:,i])
        if i==0:
            maxes=[]
            gen_maxes=[]
            values=[]
            ohandle.write('eig%s maxes\n' % i)
            ohandle.write('state\tgenstate\tmagnitude\trmsd\tcom\n')
            for n in order[::-1][:5]:
                gen_maxes.append(numpy.where(map==n)[0])
                maxes.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "maxes at ",  maxes, values
            maxes=numpy.array(maxes)
            if write==True:
                get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max')
        else:
            maxes=[]
            gen_maxes=[]
            values=[]
            ohandle.write('eig%s maxes\n' % i)
            for n in order[::-1][:5]:
                gen_maxes.append(numpy.where(map==n)[0])
                maxes.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "maxes at ",  maxes, values
            order=numpy.argsort(eigs_m[1][:,i])
            mins=[]
            gen_mins=[]
            values=[]
            ohandle.write('eig%s mins\n' % i)
            for n in order[:5]:
                gen_mins.append(numpy.where(map==n)[0])
                mins.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "mins at ",  mins, values
            if write==True:
                get_structure(modeldir, i, gen_maxes,  maxes, gens, project, ass, type='max')
                get_structure(modeldir, i, gen_mins,  mins, gens, project, ass, type='min')
        pylab.scatter(map_com[order], map_rmsd[order], c=eigs_m[1][order,i], cmap=cm, s=1000*abs(eigs_m[1][order,i]), alpha=0.5)
        print map_com[order][numpy.argmax(eigs_m[1][order,i])]
        print eigs_m[1][order,i][1]
        CB=pylab.colorbar()
        l,b,w,h=pylab.gca().get_position().bounds
        ll, bb, ww, hh=CB.ax.get_position().bounds
        CB.ax.set_position([ll, b+0.1*h, ww, h*0.8])
        CB.set_label('Eig%s Magnitudes' % i)
        ylabel=pylab.ylabel('Ligand RMSD to Xtal ($\AA$)')
        xlabel=pylab.xlabel(r'P Active Site - L COM Distance ($\AA$)')
        pylab.legend(loc=8, frameon=False)
        pylab.savefig('%s/2deigs%i_com_prmsd.png' %(modeldir, i),dpi=300)
Beispiel #33
0
    the structures of each of the cluster centers.  Produced using Cluster.py.''', default='Data/Gens.lh5')


    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()
    
    if -1 in args.states:
        print "Ripping PDBs for all states"
        args.states = 'all'

    if args.conformations_per_state == -1:
        print "Getting all PDBs for each state"
        args.conformations_per_state = 'all'

    atom_indices = np.loadtxt(args.lprmsd_atom_indices, np.int)
    assignments = io.loadh(args.assignments)
    project = Project.load_from(args.project)
    
    if args.lprmsd_permute_atoms == 'None':
        permute_indices = None
    else:
        permute_indices = ReadPermFile(args.lprmsd_permute_atoms)

    if args.lprmsd_alt_indices == 'None':
        alt_indices = None
    else:
        alt_indices = np.loadtxt(args.lprmsd_alt_indices, np.int)

    run(project, assignments, args.conformations_per_state,
         args.states, args.output_dir, args.generators, atom_indices, permute_indices, alt_indices, args.total_memory_gb)
        conformations to randomly sample from your data per state''',
                        type=int)
    parser.add_argument('format',
                        help='''Format to output the data in. Note
        that the PDB format is uncompressed and not efficient. For XTC, you can view
        the trajectory using your project's topology file''',
                        default='lh5',
                        choices=['pdb', 'xtc', 'lh5'])
    args = parser.parse_args()

    if args.output == 'XRandomConfs':
        args.output = '%dRandomConfs.%s' % (args.conformations_per_state,
                                            args.format)

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
    project = Project.load_from(args.project)

    random_confs = run(project, assignments, args.conformations_per_state)

    if args.format == 'pdb':
        random_confs.SaveToPDB(args.output)
    elif args.format == 'lh5':
        random_confs.SaveToLHDF(args.output)
    elif args.format == 'xtc':
        random_confs.SaveToXTC(args.output)
    else:
        raise ValueError('Unrecognized format')
discards (expensive!) data, so should only be used if an optimal
clustering is not available.

Note: Check your cluster sized with CalculateClusterRadii.py to get
a handle on how big they are before you trim. Recall the radius is the
*average* distance to the generator, here you are enforcing the
*maximum* distance.

Output: A trimmed assignments file (Assignments.Trimmed.h5).""")
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument('distances', default='Data/Assignments.h5.distances')
    parser.add_argument('rmsd_cutoff', help="""distance value at which to trim,
        in. Data further than this value to its generator will be
        discarded. Note: this is measured with whatever distance metric you used to cluster""", type=float)
    parser.add_argument('output', default='Data/Assignments.Trimmed.h5')
    args = parser.parse_args()
    
    arglib.die_if_path_exists(args.output)
    
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
        distances =  io.loadh(args.distances, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
        distances =  io.loadh(args.distances, 'Data')

    trimmed = run(assignments, distances, args.rmsd_cutoff)
    
    io.saveh(args.output, trimmed)
    logger.info('Saved output to %s', args.output)
Beispiel #36
0
parser.add_argument('-N',dest='N',default=0,type=int,help='Which eigenvector to look at.')
parser.add_argument('--double',dest='double',default=False,action='store_true',help='Pass this flag if you used msmbuilder.metrics.Dihedrals, which means there is a sin and cosine entry for each angle')
options = parser.parse_args()
 
import numpy as np
from msmbuilder import io, Trajectory
from msmbuilder import metrics
from msmbuilder.geometry import dihedral
import matplotlib
matplotlib.use('pdf')
from matplotlib.pyplot import *
import os, sys, re
 
pdb = Trajectory.load_trajectory_file( options.pdbFN )

pca = io.loadh( options.pcaFN )

decInd = np.argsort( pca['vals'] )[::-1]


v0 = np.abs(pca['vecs'][:,decInd][:,options.N])
if options.double:
    if v0.shape[0] % 2:
        print "There are an odd number of entries, so --double should not be passed here, or something else has gone wrong."
        exit()
 
    n0 = v0.shape[0]
    v0 = v0[:n0/2] + v0[n0/2:]


Beispiel #37
0
def main(coarse_val, orig_val, rcut):
    data=dict()
    data['coarse']=dict()
    data['orig']=dict()
    dirs=dict()
    dirs['coarse']='./d%s' % coarse_val
    dirs['orig']='./d%s' % orig_val
    proj=Project.load_from('ProjectInfo.yaml')
    types=['ass', 'rmsd', 'dist', 'gens']
    for key in ['coarse', 'orig']:
        for type in types:
            if 'ass' in type:
                ass=io.loadh('%s/Data/Assignments.h5' % dirs[key])
                data[key][type]=ass['arr_0']
            elif 'dist' in type:
                ass=io.loadh('%s/Data/Assignments.h5.distances' % dirs[key])
                data[key][type]=ass['arr_0']
            elif 'rmsd' in type:
                rmsd=numpy.loadtxt('%s/Gens.rmsd.dat' % dirs[key])
                data[key][type]=rmsd
            elif 'gens' in type:
                gens=Trajectory.load_from_lhdf('%s/Gens.lh5' % dirs[key])
                data[key][type]=gens
    unboundmap=dict()
    boundmap=dict()
    #unboundstates=dict()
    #unboundrmsd=dict()
    # build map dict for orig to coarse unbound states, bound will stay same
    newass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int)
    for j in range(0, data['orig']['ass'].shape[0]):
        for (n,i) in enumerate(data['orig']['ass'][j]):
            # if unbound
            if i != -1:
                if data['orig']['rmsd'][i] > float(rcut):
                    state=data['coarse']['ass'][j][n]
                    newass[j][n]=state+10000
                else:
                    newass[j][n]=i
    count=0
    unique=set(newass.flatten())
    boundmap=dict()
    unboundmap=dict()
    for x in unique:
        locations=numpy.where(newass==x)
        newass[locations]=count
        if x >= 10000:
            unboundmap[count]=(x-10000)
        else:
            boundmap[count]=x
        count+=1
    io.saveh('%s/Coarsed_r%s_Assignments.h5' % (dirs['orig'], rcut), newass)
    subdir='%s/Coarsed_r%s_gen/' % (dirs['orig'], rcut)
    if not os.path.exists(subdir):
        os.mkdir(subdir)
    ohandle=open('%s/Coarsed%s_r%s_Gens.rmsd.dat' % (subdir, coarse_val, rcut), 'w')
    b=data['orig']['gens']['XYZList'].shape[1]
    c=data['orig']['gens']['XYZList'].shape[2]
    dicts=[boundmap, unboundmap]
    names=['bound', 'unbound']
    labels=['orig', 'coarse']
    total=len(boundmap.keys()) + len(unboundmap.keys())
    structure=proj.empty_traj()
    structure['XYZList']=numpy.zeros((total, b, c), dtype='float32')
    count=0
    for (name, label, mapdata) in zip( names, labels, dicts):
        print "writing coarse gen %s out of %s pdbs" % (count, len(mapdata.keys()))
        for i in sorted(mapdata.keys()):
            macro=mapdata[i]
            structure['XYZList'][count]=data[label]['gens']['XYZList'][macro]
            ohandle.write('%s\t%s\t%s\n' % (name, count, data[label]['rmsd'][macro]))
            print name, count
            count+=1
    structure.save_to_xtc('%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut))
Beispiel #38
0
import numpy as np
from scipy.optimize import fsolve
from msmbuilder import io
import argparse
import matplotlib
from matplotlib.pyplot import *
import IPython

parser = argparse.ArgumentParser()
parser.add_argument('-d', dest='data', help='data for each state')
parser.add_argument('-f', dest='eig', help='eigenvector value of each state')

args = parser.parse_args()

M = io.loadh(args.data, 'HB_maps')
if len(M.shape) > 2:
    M = M.reshape((M.shape[0], -1))

M = M - M.mean(0)
print M.shape

eig = io.loadh(args.eig, 'arr_0')

b = eig / np.sqrt(eig.dot(eig) / eig.shape[0])
b = np.reshape(b, (-1, 1))

sigma = M.T.dot(M)
pca_vals, pca_vecs = np.linalg.eig(sigma)

ind = np.where(pca_vals > 1E-8)[0]
    parser.add_argument('output', help="""The name of the RandomConfs
        trajectory (.lh5) to write. XRandomConfs.lh5, where X=Number of
        Conformations.""", default='XRandomConfs')
    parser.add_argument('conformations_per_state', help='''Number of
        conformations to randomly sample from your data per state''', type=int)
    parser.add_argument('format', help='''Format to output the data in. Note
        that the PDB format is uncompressed and not efficient. For XTC, you can view
        the trajectory using your project's topology file''', default='lh5',
        choices=['pdb', 'xtc', 'lh5'])    
    args = parser.parse_args()
    
    if args.output == 'XRandomConfs':
            args.output = '%dRandomConfs.%s' % (args.conformations_per_state, args.format)
    
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
       assignments = io.loadh(args.assignments, 'Data')
    project = Project.load_from(args.project)
    
    random_confs = run(project, assignments, args.conformations_per_state)
        
    if args.format == 'pdb':
        random_confs.SaveToPDB(args.output)
    elif args.format == 'lh5':
        random_confs.SaveToLHDF(args.output)
    elif args.format == 'xtc':
        random_confs.SaveToXTC(args.output)
    else:
        raise ValueError('Unrecognized format')
Beispiel #40
0
def get_implied_timescales_helper(args):
    """Helper function to compute implied timescales with multiprocessing

    Does not work in interactive mode

    Parameters
    ----------
    assignments_fn : str
        Path to Assignments.h5 file on disk
    n_states : int
        Number of states
    lag_time : list
        List of lag times to calculate the timescales at
    n_implied_times : int, optional
        Number of implied timescales to calculate at each lag time
    sliding_window : bool, optional
        Use sliding window
    trimming : bool, optional
        Use ergodic trimming
    symmetrize : {'MLE', 'Transpose', None}
        Symmetrization method

    Returns
    -------
    lagTimes : ndarray
        vector of lag times
    impTimes : ndarray
        vector of implied timescales

    See Also
    --------
    MSMLib.build_msm
    get_eigenvectors
    """

    assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args

    try:
        assignments = io.loadh(assignments_fn, 'arr_0')
    except KeyError:
        assignments = io.loadh(assignments_fn, 'Data')

    try:
        from msmbuilder import MSMLib
        t_matrix = MSMLib.build_msm(assignments, lag_time, symmetrize,
                                    sliding_window, trimming)[1]
    except ValueError as e:
        logger.critical(e)
        sys.exit(1)

    #TJL: set Epsilon high, should not raise err here
    n_eigenvectors = n_implied_times + 1
    e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0]

    # make sure to leave off equilibrium distribution
    lag_times = lag_time * np.ones((n_implied_times))
    imp_times = -lag_times / np.log(e_values[1:n_eigenvectors])

    # save intermediate result in case of failure
    # res = np.zeros((n_implied_times, 2))
    # res[:,0] = lag_times
    # res[:,1] = np.real(imp_times)

    return (lag_times, imp_times)
def get_implied_timescales_helper(args):
    """Helper function to compute implied timescales with multiprocessing

    Does not work in interactive mode

    Parameters
    ----------
    assignments_fn : str
        Path to Assignments.h5 file on disk
    n_states : int
        Number of states
    lag_time : list
        List of lag times to calculate the timescales at
    n_implied_times : int, optional
        Number of implied timescales to calculate at each lag time
    sliding_window : bool, optional
        Use sliding window
    trimming : bool, optional
        Use ergodic trimming
    symmetrize : {'MLE', 'Transpose', None}
        Symmetrization method

    Returns
    -------
    lagTimes : ndarray
        vector of lag times
    impTimes : ndarray
        vector of implied timescales

    See Also
    --------
    MSMLib.build_msm
    get_eigenvectors
    """
    
    assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args
    
    try:
        assignments = io.loadh(assignments_fn, 'arr_0')
    except KeyError:
        assignments = io.loadh(assignments_fn, 'Data')
    
    try:
        from msmbuilder import MSMLib
        
        counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lag_time, 
                                                          sliding_window=sliding_window)
        rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize, trimming)

    except ValueError as e:
        logger.critical(e)
        sys.exit(1)

    #TJL: set Epsilon high, should not raise err here
    n_eigenvectors = n_implied_times + 1
    e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0]

    # make sure to leave off equilibrium distribution
    lag_times = lag_time * np.ones((n_implied_times))
    imp_times = -lag_times / np.log(e_values[1: n_eigenvectors])

    # save intermediate result in case of failure
    # res = np.zeros((n_implied_times, 2))
    # res[:,0] = lag_times
    # res[:,1] = np.real(imp_times)

    return (lag_times, imp_times)
Beispiel #42
0
dcds=numpy.loadtxt('mapped_trajs.txt', usecols=(0,), dtype=str)
xtcs=numpy.loadtxt('mapped_trajs.txt', usecols=(1,), dtype=str)
mapping=dict()
for (i,j) in zip(dcds, xtcs):
    name=i.split('_nowat_')[0]
    mapping[j]=name
    
#for j in mapping.keys():
#    total=mapping[j]+bw[j]
#    if total!=totals[j]:
#        print "problem"
#        import pdb
#        pdb.set_trace()

ohandle=open('d6/msml1000_coarse_r10_d20/traj_frames.txt', 'w')
ass=io.loadh('d6/msml1000_coarse_r10_d20/Assignments.Fixed.h5')
mapfile=numpy.loadtxt('d6/msml1000_coarse_r10_d20/Mapping.dat')
sample=False
for state in sorted(set(ass['arr_0'].flatten())):
    if state!=-1:
        traj=numpy.where(ass['arr_0']==state)[0]
        frames=numpy.where(ass['arr_0']==state)[1]
        indices=numpy.random.random_integers(0, len(traj)-1, len(traj))
        for ind in indices:
            traj_ind=traj[ind]
            mapped_traj=mapping['trj%s' % traj_ind]
            if mapped_traj in bw.keys():
                minval=bw[mapped_traj]
            else:
                minval=0
            location=numpy.where((traj==traj_ind)&(frames>minval))[0]
Beispiel #43
0
from time import time
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
sh = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(fmt="%(asctime)s - %(message)s", datefmt="%H:%M:%S")
sh.setFormatter(formatter)
logger.addHandler(sh)
logger.propagate = False

logger.info("start")
Proj = Project.load_from(args.proj_FN)
logger.info("loaded project info")
try:
    Ass = io.loadh(args.ass_FN)["arr_0"]
except:
    Ass = io.loadh(args.ass_FN)["Data"]

pdb = Trajectory.load_from_pdb(Proj.conf_filename)

which = np.loadtxt(args.which).astype(int)

distance_cutoff = 0.32
angle_cutoff = 120


def get_hb(traj):

    # get accH - donor distance:
    dists = contact.atom_distances(traj["XYZList"], atom_contacts=which[:, 1:])
Beispiel #44
0
import sys, os
import scipy.io
from msmbuilder import MSMLib
from msmbuilder.io import loadh, saveh

try:
    Assignments = loadh("%s" % (sys.argv[1]), 'arr_0').astype(int)
except KeyError:
    Assignments = loadh("%s" % (sys.argv[1]), 'Data').astype(int)
NumStates = max(Assignments.flatten()) + 1
LagTime = int(sys.argv[2])
Counts = MSMLib.get_count_matrix_from_assignments(Assignments,
                                                  n_states=NumStates,
                                                  lag_time=LagTime,
                                                  sliding_window=True)
scipy.io.mmwrite('%s' % (sys.argv[3]), Counts)