Ejemplo n.º 1
0
def run(project, atom_indices=None, traj_fn='all'):

    n_atoms = project.load_conf()['XYZList'].shape[1]

    if traj_fn.lower() == 'all':

        SASA = np.ones(
            (project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1

        for traj_ind in xrange(project.n_trajs):
            traj_asa = []
            logger.info("Working on Trajectory %d", traj_ind)
            traj_fn = project.traj_filename(traj_ind)
            chunk_ind = 0
            for traj_chunk in Trajectory.enum_chunks_from_lhdf(
                    traj_fn, AtomIndices=atom_indices):
                #print chunk_ind
                traj_asa.extend(
                    asa.calculate_asa(traj_chunk, n_sphere_points=24))
                chunk_ind += 1
            SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa

    else:
        traj_asa = []
        for traj_chunk in Trajectory.enum_chunks_from_lhdf(
                traj_fn, AtomIndices=atom_indices):
            traj_asa.extend(asa.calculate_asa(traj_chunk))

        SASA = np.array(traj_asa)

    return SASA
Ejemplo n.º 2
0
def plot_gpu_cmd_correlation():
    traj1 = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)
    traj1_copy = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)

    #traj2 = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)
    #traj2_copy = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)

    def gpudist(t):
        gpurmsd = GPURMSD()
        pt = gpurmsd.prepare_trajectory(t)
        gpurmsd._gpurmsd.print_params()
        return gpurmsd.one_to_all(pt, pt, 0)

    def cpudist(t):
        rmsd = RMSD()
        pt = rmsd.prepare_trajectory(t)
        return rmsd.one_to_all(pt, pt, 0)

    g1 = gpudist(traj1)  #, gpudist(traj2)
    c1 = cpudist(traj1_copy)  #, cpudist(traj2_copy)

    pp.subplot(231)
    pp.plot(c1)
    pp.title('cpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    pp.subplot(232)
    pp.scatter(g1, c1)
    pp.xlabel('gpu rmsd')
    pp.ylabel('cpu rmsd')

    pp.subplot(233)
    pp.plot(g1)
    pp.title('gpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')

    #PLOT c2 and g2 in the lower portion of the graph

    #pp.subplot(234)
    #pp.plot(c2)
    #pp.title('cpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplot(235)
    #pp.scatter(g2, c2)
    #pp.xlabel('gpu rmsd')
    #pp.ylabel('cpu rmsd')

    #pp.subplot(236)
    #pp.plot(g2)
    #pp.title('gpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplots_adjust(hspace=0.4)
    #pp.savefig('gpucpu_correlation.png')
    pp.show()
Ejemplo n.º 3
0
def run(project, atom_indices=None, traj_fn = 'all'):

    n_atoms = project.load_conf()['XYZList'].shape[1]

    if traj_fn.lower() == 'all':

        SASA = np.ones((project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1

        for traj_ind in xrange(project.n_trajs):
            traj_asa = []
            logger.info("Working on Trajectory %d", traj_ind)
            traj_fn = project.traj_filename(traj_ind)
            chunk_ind = 0
            for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ):
                #print chunk_ind
                traj_asa.extend(asa.calculate_asa(traj_chunk, n_sphere_points = 24))
                chunk_ind += 1
            SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa

    else:
        traj_asa = []
        for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ):
            traj_asa.extend( asa.calculate_asa( traj_chunk ) )

        SASA = np.array(traj_asa)

    return SASA
Ejemplo n.º 4
0
def main(modeldir, genfile,  type, write=False):
    data=dict()
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)
    frames=numpy.where(map!=-1)[0]

    unbound=numpy.loadtxt('%s/tpt-rmsd-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    bound=numpy.loadtxt('%s/tpt-rmsd-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int)

    dir=modeldir.split('Data')[0]
    name=glob.glob('%s/fkbp*xtal*pdb' % dir)
    pdb=Trajectory.load_from_pdb(name[0])
    paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type))

    committors=numpy.loadtxt('%s/commitor_states.txt' % modeldir, dtype=int)
    colors=['red', 'orange', 'green', 'cyan', 'blue', 'purple']
    colors=colors*40
    if type=='strict':
        ref=5
    elif type=='super-strict':
        ref=3
    elif type=='medium':
        ref=10
    elif type=='loose':
        ref=15
    #for p in range(0, 3):
    for p in range(0, 1):
        path=paths['Paths'][p]
        print "Bottleneck", paths['Bottlenecks'][p]
        flux=paths['fluxes'][p]/paths['fluxes'][0]
        if flux < 0.2:
            break
        print "flux %s" % flux
        frames=numpy.where(path!=-1)[0]
        path=numpy.array(path[frames], dtype=int)
        print path
        if write==True:
            size=(paths['fluxes'][p]/paths['fluxes'][0])*1000
            traj=Trajectory.load_from_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p), Conf=pdb)
            data=build_metric(dir, pdb, traj)
            dir=modeldir.split('Data')[0]
            for op in sorted(data.keys()):
            #for op in residues:
                pylab.figure()
                pylab.scatter(data['rmsd'], data[op], c=colors[p], alpha=0.7) #, s=size)
                for j in paths['Bottlenecks'][p]:
                    frame=numpy.where(paths['Paths'][p]==j)[0]
                    pylab.scatter(data['rmsd'][frame*20], data[op][frame*20], marker='x', c='k', alpha=0.7, s=50)
                    location=numpy.where(committors==paths['Paths'][p][frame])[0]
                    if location.size:
                        print "path %s state %s bottleneck in committors" % (p, j)
                        print data['rmsd'][frame*20], data[op][frame*20]
                pylab.title('path %s' % p)
                pylab.xlabel('P-L RMSD')
                #pylab.xlabel('P-L COM')
                pylab.ylabel(op)
                pylab.xlim(0,max(data['rmsd'])+5)
                #pylab.ylim(0,max(data[op])+5)
                pylab.show()
Ejemplo n.º 5
0
def plot_gpu_cmd_correlation():
    traj1 = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)
    traj1_copy = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)
    #traj2 = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)
    #traj2_copy = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)

    def gpudist(t):
        gpurmsd = GPURMSD()
        pt = gpurmsd.prepare_trajectory(t)
        gpurmsd._gpurmsd.print_params()
        return gpurmsd.one_to_all(pt, pt, 0)
    def cpudist(t):
        rmsd = RMSD()
        pt = rmsd.prepare_trajectory(t)
        return rmsd.one_to_all(pt, pt, 0)
    g1 = gpudist(traj1) #, gpudist(traj2)
    c1 = cpudist(traj1_copy) #, cpudist(traj2_copy)

    pp.subplot(231)
    pp.plot(c1)
    pp.title('cpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    pp.subplot(232)
    pp.scatter(g1, c1)
    pp.xlabel('gpu rmsd')
    pp.ylabel('cpu rmsd')

    pp.subplot(233)
    pp.plot(g1)
    pp.title('gpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')


    #PLOT c2 and g2 in the lower portion of the graph

    #pp.subplot(234)
    #pp.plot(c2)
    #pp.title('cpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplot(235)
    #pp.scatter(g2, c2)
    #pp.xlabel('gpu rmsd')
    #pp.ylabel('cpu rmsd')

    #pp.subplot(236)
    #pp.plot(g2)
    #pp.title('gpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplots_adjust(hspace=0.4)
    #pp.savefig('gpucpu_correlation.png')
    pp.show()
Ejemplo n.º 6
0
def test_xtc_dcd():
    pdb_filename = get("native.pdb", just_filename=True)
    xtc_filename = get('RUN00_frame0.xtc', just_filename=True)
    dcd_filename = get('RUN00_frame0.dcd', just_filename=True)
    r_xtc = Trajectory.load_from_xtc(xtc_filename, pdb_filename)
    r_dcd = Trajectory.load_from_dcd(dcd_filename, pdb_filename)

    x_xtc = r_xtc["XYZList"]
    x_dcd = r_dcd["XYZList"]

    eq(x_xtc, x_dcd, decimal=4)
Ejemplo n.º 7
0
def test_asa_3():

    traj_ref = np.loadtxt( os.path.join(reference_dir(),'g_sas_ref.dat'))
    Conf = Trajectory.load_from_pdb(os.path.join( fixtures_dir(), 'native.pdb'))

    traj = Trajectory.load_trajectory_file( os.path.join(fixtures_dir(), 'trj0.xtc') , Conf=Conf)
    traj_asa = calculate_asa(traj, probe_radius=0.14, n_sphere_points = 960)
    
    # the algorithm used by gromacs' g_sas is slightly different than the one
    # used here, so the results are not exactly the same -- see the comments
    # in src/python/geomtry/asa.py or the readme file src/ext/asa/README.txt
    # for details
    npt.assert_array_almost_equal(traj_asa, traj_ref, decimal=2)    
Ejemplo n.º 8
0
    def load_frame(self, traj_index, frame_index):
        """Load one or more specified frames.

        Example
        -------
        >>> project = Project.load_from('ProjectInfo.yaml')
        >>> foo = project.load_frame(1,10)
        >>> bar = Trajectory.read_frame(TrajFilename=project.traj_filename(1),
            WhichFrame=10)
        >>> np.all(foo['XYZList'] == bar)
        True

        Parameters
        ----------
        traj_index : int, [int]
            Index or indices of the trajectories to pull from
        frame_index : int, [int]
            Index or indices of the frames to pull from

        Returns
        -------
        traj : msmbuilder.Trajectory
            A trajectory object containing the requested frame(s).
        """

        if np.isscalar(traj_index) and np.isscalar(frame_index):
            xyz = Trajectory.read_frame(TrajFilename=self.traj_filename(traj_index),
                WhichFrame=frame_index)
            xyzlist = np.array([xyz])
        else:
            traj_index = np.array(traj_index)
            frame_index = np.array(frame_index)
            if not (traj_index.ndim == 1 and np.all(traj_index.shape == frame_index.shape)):
                raise ValueError('traj_index and frame_index must be 1D and have the same length')

            xyzlist = []
            for i,j in zip(traj_index, frame_index):
                if j >= self.traj_lengths[i]:
                    raise ValueError('traj %d too short (%d) to contain a frame %d' % (i, self.traj_lengths[i], j))
                xyz = Trajectory.read_frame(TrajFilename=self.traj_filename(i),
                    WhichFrame=j)
                xyzlist.append(xyz)
            xyzlist = np.array(xyzlist)

        conf = self.load_conf()
        conf['XYZList'] = xyzlist

        return conf
Ejemplo n.º 9
0
def test1():
    """
    This test shows how to get the number of helix from a trajectory.
    """
    traj = Trajectory.LoadFromLHDF('/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj34.lh5')
    numhelix = compute_numhelix_trajectory(traj)
    print len(numhelix)
Ejemplo n.º 10
0
def run(project, assignments, conformations_per_state, states, output_dir):
    if states == "all":
        states = np.arange(assignments.max() + 1)

    inverse_assignments = defaultdict(lambda: [])
    for i in xrange(assignments.shape[0]):
        for j in xrange(assignments.shape[1]):
            inverse_assignments[assignments[i, j]].append((i, j))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    empty_traj = project.empty_traj()
    for s in states:
        if len(inverse_assignments[s]) == 0:
            raise ValueError('No assignments to state! %s' % s)

        random.shuffle(inverse_assignments[s])
        if len(inverse_assignments[s]) >= conformations_per_state:
            confs = inverse_assignments[s][0:conformations_per_state]
        else:
            confs = inverse_assignments[s]
            logger.warning('Not enough assignments in state %s', s)

        for i, (traj_ind, frame) in enumerate(confs):
            outfile = os.path.join(output_dir, 'State%d-%d.pdb' % (s, i))
            if not os.path.exists(outfile):
                logger.info('Saving state %d (traj %d, frame %d) as %s', s,
                            traj_ind, frame, outfile)
                traj_filename = project.traj_filename(traj_ind)
                xyz = Trajectory.read_frame(traj_filename, frame)
                empty_traj['XYZList'] = np.array([xyz])
                empty_traj.save_to_pdb(outfile)
            else:
                logger.warning('Skipping %s. Already exists', outfile)
Ejemplo n.º 11
0
    def __init__(self, structure_or_filename, metric, max_distance):
        """Create an explosion validator
    
        Checks the distance from every frame to a structure and
        watches for things that are too far away
        
        Parameters
        ----------
        structure_or_filename : {msmbuilder.Trajectory, str}
            The structure to measure distances to, either as a trajectory (the first
            frame is the only one that counts) or a path to a trajectory
            on disk that can be loaded
        metric : msmbuilder distance metric
            Metric by which you want to measure distance
        max_distance : float
            The threshold distance, above which a ValidationError
            will be thrown
        """

        if isinstance(structure_or_filename, Trajectory):
            conf = structure_or_filename
        elif isinstance(structure_or_filename, basestring):
            conf = Trajectory.load_trajectory_file(structure_or_filename)

        self.max_distance = max_distance
        self.metric = metric
        self._pconf = self.metric.prepare_trajectory(conf)
Ejemplo n.º 12
0
    def __init__(self, structure_or_filename, metric, max_distance):
        """Create an explosion validator
    
        Checks the distance from every frame to a structure and
        watches for things that are too far away
        
        Parameters
        ----------
        structure_or_filename : {msmbuilder.Trajectory, str}
            The structure to measure distances to, either as a trajectory (the first
            frame is the only one that counts) or a path to a trajectory
            on disk that can be loaded
        metric : msmbuilder distance metric
            Metric by which you want to measure distance
        max_distance : float
            The threshold distance, above which a ValidationError
            will be thrown
        """

        if isinstance(structure_or_filename, Trajectory):
            conf = structure_or_filename
        elif isinstance(structure_or_filename, basestring):
            conf = Trajectory.load_trajectory_file(structure_or_filename)

        self.max_distance = max_distance
        self.metric = metric
        self._pconf = self.metric.prepare_trajectory(conf)
Ejemplo n.º 13
0
def create_hcstrings_states(Assignments, outfile='HCstrings_states.txt'):
    SA = hct.get_StatesAssignments(Assignments)
    states = SA.keys()
    HCstrings_states = {}
    n = 0
    for state in states:
        n += 1
        print "Get HC strings for state %d/%d" % (n, len(states))
        TrajID = SA[state].keys()
        numhelix_state = []
        HCstrings_states[state] = []
        for trajid in TrajID:
            TrajFile = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%s_hc.lh5' % trajid
            Traj = Trajectory.LoadFromLHDF(TrajFile)
            HCstrings_states[state] += [
                Traj['HCs'][i] for i in SA[state][trajid]
            ]
    fn = outfile
    if os.path.exists(fn):
        newfn = fn + '.bck'
        os.system('mv %s %s' % (fn, newfn))
    print "Write HCstings of states into %s" % fn
    HCfile = open(fn, 'w')
    pickle.dump(HCstrings_states, HCfile)
    HCfile.close()
    print "Done."
Ejemplo n.º 14
0
def test_traj_0():
    
    aind = np.unique( np.random.randint( 22, size=4) )
    stride = np.random.randint(1, 100 )
    
    r_traj = get('Trajectories/trj0.lh5')

    r_traj.restrict_atom_indices( aind )

    r_traj['XYZList'] = r_traj['XYZList'][ ::stride ]

    traj = Trajectory.load_from_lhdf(get('Trajectories/trj0.lh5', just_filename=True),
        Stride=stride, AtomIndices=aind)

    # make sure we loaded the right number of atoms
    assert traj['XYZList'].shape[1] == len(aind)

    for key in traj.keys():
        if key in ['SerializerFilename'] :
            continue
        
        if key in ['IndexList']:
            for row, r_row in zip( traj[key], r_traj[key] ):
                eq(row, r_row)
        elif key == 'XYZList':
            eq(traj[key], r_traj[key])
        else:
            eq(traj[key], r_traj[key])
Ejemplo n.º 15
0
    def test(self):
        from msmbuilder.scripts.SaveStructures import save

        project = get('ProjectInfo.yaml')
        assignments = get('Assignments.h5')['arr_0']
        which_states = [0, 1, 2]
        list_of_trajs = project.get_random_confs_from_states(assignments, 
            which_states, num_confs=2, replacement=True,
            random=np.random.RandomState(42))

        assert isinstance(list_of_trajs, list)
        assert isinstance(list_of_trajs[0], Trajectory)
        eq(len(list_of_trajs), len(which_states))
        for t in list_of_trajs:
            eq(len(t), 2)

        print list_of_trajs[0].keys()
        # sep, tps, one
        save(list_of_trajs, which_states, style='sep', format='lh5', outdir=self.td)
        save(list_of_trajs, which_states, style='tps', format='lh5', outdir=self.td)
        save(list_of_trajs, which_states, style='one', format='lh5', outdir=self.td)

        names = ['State0-0.lh5', 'State0-1.lh5', 'State0.lh5', 'State1-0.lh5',
                'State1-1.lh5', 'State1.lh5', 'State2-0.lh5', 'State2-1.lh5',
                'State2.lh5']

        for name in names:
            t = Trajectory.load_trajectory_file(pjoin(self.td, name))
            eq(t, get('save_structures/' + name))
Ejemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-n', '--n_trajs', help='number of trajectories. Default=10', type=int, default=10)
    parser.add_argument('-t', '--traj_length', help='trajectories length. Default=10000', type=int, default=10000)
    args = parser.parse_args()
    
    # these could be configured
    kT = 15.0
    dt = 0.1
    mGamma = 1000.0
    
    forcecalculator = muller.muller_force()
    

    project = Project({'ConfFilename': os.path.join(mullermsm.__path__[0], 'conf.pdb'),
              'NumTrajs': args.n_trajs,
              'ProjectRootDir': '.',
              'TrajFileBaseName': 'trj',
              'TrajFilePath': 'Trajectories',
              'TrajFileType': '.lh5',
              'TrajLengths': [args.traj_length]*args.n_trajs})
              
              
    if os.path.exists('ProjectInfo.h5'):
        print >> sys.stderr, "The file ./ProjectInfo.h5 already exists. I don't want to overwrite anything, so i'm backing off"
        sys.exit(1)
    
    
    try:
        os.mkdir('Trajectories')
    except OSError:
        print >> sys.stderr, "The directory ./Trajectores already exists. I don't want to overwrite anything, so i'm backing off"
        sys.exit(1)
        
    for i in range(args.n_trajs):
        print 'simulating traj %s' % i
        
        # select initial configs randomly from a 2D box
        initial_x = [random.uniform(-1.5, 1.2), random.uniform(-0.2, 2)]
        print 'starting conformation from randomly sampled points (%s, %s)' % (initial_x[0], initial_x[1])
        print 'propagating for %s steps on the Muller potential with a Langevin integrator...' % args.traj_length
        
        positions = muller.propagate(args.traj_length, initial_x, kT, dt, mGamma, forcecalculator)

        # positions is N x 2, but we want to make it N x 1 x 3 where the additional
        # column is just zeros. This way, being N x 1 x 3, it looks like a regular MD
        # trajectory that would be N_frames x N_atoms x 3
        positions3 = np.hstack((positions, np.zeros((len(positions),1)))).reshape((len(positions), 1, 3))
        t = Trajectory.LoadTrajectoryFile(project['ConfFilename'])
        t['XYZList'] = positions3
        
        t.SaveToLHDF(project.GetTrajFilename(i))
        print 'saving trajectory to %s' % project.GetTrajFilename(i)
        
    project.SaveToHDF('ProjectInfo.h5')
    print 'saved ProjectInfo.h5 file'

    
    pickle.dump(metric.EuclideanMetric(), open('metric.pickl', 'w'))
    print 'saved metric.pickl'
Ejemplo n.º 17
0
    def save(self):
        "Save the trajs as a n MSMBuilder project"
        
        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project({'conf_filename': os.path.abspath(self.conf_filename),
            'traj_lengths': self.n_frames*np.ones(self.n_trajs),
            'traj_paths': [os.path.abspath(e) for e in traj_paths],
            'traj_converted_from': [[] for i in range(self.n_trajs)],
            'traj_errors': [None for i in range(self.n_trajs)],
            }, project_dir=self.project_dir, validate=True)
        p.save(pjoin(self.project_dir,'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir,'Project.yaml'))
        p._validate()
        assert np.all((p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
Ejemplo n.º 18
0
    def save(self):
        "Save the trajs as a n MSMBuilder project"

        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None  # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project(
            {
                'conf_filename': os.path.abspath(self.conf_filename),
                'traj_lengths': self.n_frames * np.ones(self.n_trajs),
                'traj_paths': [os.path.abspath(e) for e in traj_paths],
                'traj_converted_from': [[] for i in range(self.n_trajs)],
                'traj_errors': [None for i in range(self.n_trajs)],
            },
            project_dir=self.project_dir,
            validate=True)
        p.save(pjoin(self.project_dir, 'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir, 'Project.yaml'))
        p._validate()
        assert np.all(
            (p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
Ejemplo n.º 19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-g', '--generators', default='Data/Gens.lh5', help='Path to Gens.lh5')
    parser.add_argument('-p', '--project', default='ProjectInfo.h5', help='Path to ProjectInfo.h5')
    parser.add_argument('-s', '--stride', default=5, type=int, help='Stride to plot the data at')
    args = parser.parse_args()
    
    
    gens = Trajectory.LoadTrajectoryFile(args.generators)
    gens_x = gens['XYZList'][:,0,0]
    gens_y =  gens['XYZList'][:,0,1]
    points = np.array([gens_x, gens_y]).transpose()
    
    
    
    tri = Delaunay(points)

    PL = []
    for p in points:
        PL.append(Voronoi.Site(x=p[0],y=p[1]))

    v,eqn,edges,wtf = Voronoi.computeVoronoiDiagram(PL)

    edge_points=[]
    for (l,x1,x2) in edges:
        if x1>=0 and x2>=0:
            edge_points.append((v[x1],v[x2]))

    lines = LineCollection(edge_points, linewidths=0.5, color='k')
    
    fig = pp.figure()
    ax = fig.add_subplot(111)
    
    fig.gca().add_collection(lines)

    maxx, minx= np.max(gens_x), np.min(gens_x)
    maxy, miny = np.max(gens_y), np.min(gens_y)
    # plot the background
    plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy, ax=ax)
    pp.xlim(minx, maxx)
    pp.ylim(miny, maxy)

    # plot a single trajectory
    p = Project.LoadFromHDF(args.project)
    t = p.LoadTraj(0)
    x = t['XYZList'][:,0,0][::args.stride]
    y = t['XYZList'][:,0,1][::args.stride]
    cm = pp.get_cmap('spectral')

    n_points = len(x)
    ax.set_color_cycle([cm(1.*i/(n_points-1)) for i in range(n_points-1)])
    for i in range(n_points-1):
        ax.plot(x[i:i+2],y[i:i+2])

    pp.title('Voronoi Microstate Decomposition, with first trajectory')
    


    pp.show()
Ejemplo n.º 20
0
def test_asa_2():
    t = Trajectory.load_trajectory_file(os.path.join(fixtures_dir(), 'trj0.lh5'))
    val1 = np.sum(calculate_asa(t[0])) # calculate only frame 0
    val2 = np.sum(calculate_asa(t)[0]) # calculate on all frames
    true_frame_0_asa = 2.859646797180176
    
    npt.assert_approx_equal(true_frame_0_asa, val1)
    npt.assert_approx_equal(true_frame_0_asa, val2)
Ejemplo n.º 21
0
def load_gens(gens_fn, conf_fn, metric):
    """Setup a worker by adding pgens to its global namespace
    
    This is necessary because pgens are not necessarily picklable, so we can't
    just prepare them on the master and then push them to the remote workers --
    instead we want to actually load the pgens from disk and prepare them on
    the remote node
    """
    from msmbuilder import Trajectory
    
    global PGENS, CONF, METRIC, PREPARED
    
    METRIC = metric
    CONF = Trajectory.load_trajectory_file(conf_fn)
    gens = Trajectory.load_trajectory_file(gens_fn)
    PGENS = metric.prepare_trajectory(gens)
    PREPARED = True
Ejemplo n.º 22
0
    def test_c_Cluster(self):
        # We need to be sure to skip the stochastic k-mediods
        cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format(project=ProjectFn, stride=Stride, atomindices="AtomIndices.dat", rmsdcutoff=RMSDCutoff)
        print cmd

        os.system(cmd)
        
        try:
            os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5'))
            os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances'))
        except:
            pass

        
        G   = Trajectory.load_trajectory_file(GensPath)
        r_G = Trajectory.load_trajectory_file(ReferenceDir +'/'+ GensPath)
        self.assert_trajectories_equal(G, r_G)
Ejemplo n.º 23
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     conf = self.load_conf()
     for i in xrange(self.n_trajs):
         shape = Trajectory.load_trajectory_file(self.traj_filename(i), JustInspect=True, Conf=conf)
         lengths[i] = shape[0]
         n_atoms[i] = shape[1]
     return lengths, n_atoms
Ejemplo n.º 24
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path, distances_path)

    logger.info('All Done!')
Ejemplo n.º 25
0
def LoadTrajectory(trajectory):

    if isinstance(trajectory, str):
        try:
            t = Trajectory.LoadFromLHDF(trajectory)
            return t
        except IOError:
            raise IOError("Can not find %s" % trajectory)
    elif isinstance(trajectory, Trajectory):
        return trajectory
Ejemplo n.º 26
0
 def test_g_GetRandomConfs(self):
     P1 = Project.load_from(ProjectFn)
     Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')
     
     # make a predictable stream of random numbers by seeding the RNG with 42
     random_source = np.random.RandomState(42)
     randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source)
     
     reference = Trajectory.load_trajectory_file(os.path.join(ReferenceDir, "2RandomConfs.lh5"))
     self.assert_trajectories_equal(reference, randomconfs)
Ejemplo n.º 27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--project', default='ProjectInfo.h5')
    parser.add_argument(
        '-t',
        '--trajectories',
        nargs='+',
        help='''Supply either the path to a trajectory file (i.e. Data/Gens.lh5),
         or an integer, which will be interepreted as a trajectory index
         into the trajectories that accompany the project. default: plot all
         of the trajectories''',
        default=['-1'])
    args = parser.parse_args()

    p = Project.LoadFromHDF(args.project)

    # record the bounding box of the points so that we know
    # what to render for the background
    maxx, minx, maxy, miny = 1.2, -1.5, 2, -0.2

    # if -1 is included, add in ALL of the trajectories
    if '-1' in args.trajectories:
        args.trajectories.remove('-1')
        args.trajectories.extend(range(p['NumTrajs']))
    # remove duplicates
    args.trajectories = set(args.trajectories)

    for requested in args.trajectories:
        if os.path.exists(str(requested)):
            traj = Trajectory.LoadTrajectoryFile(str(requested))
            print 'plotting %s' % requested
            markersize = 50
        else:
            try:
                i = int(requested)
                traj = p.LoadTraj(i)
                print 'plotting %s' % i
                markersize = 5
            except ValueError:
                print >> sys.stderr, 'I couldnt figure out how to deal with the argument %s' % requested
                continue
            except IOError as e:
                print >> sys.stderr, str(e)
                continue

        xyz = traj['XYZList']
        x = xyz[:, 0, 0]
        y = xyz[:, 0, 1]

        maxx, maxy = max(np.max(x), maxx), max(np.max(y), maxy)
        minx, miny = min(np.min(x), minx), min(np.min(y), miny)
        pp.plot(x, y, '.', markersize=markersize, alpha=0.5)

    plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy)
    pp.show()
Ejemplo n.º 28
0
    def _load_traj(self, file_list):
        """
        Load a set of xtc or dcd files as a single trajectory

        Note that the ordering of `file_list` is relevant, as the trajectories
        are catted together.

        Returns
        -------
        traj : msmbuilder.Trajectory
        """

        if self.input_traj_ext == '.xtc':
            traj = Trajectory.load_from_xtc(file_list, PDBFilename=self.conf_filename,
                        discard_overlapping_frames=True)
        elif self.input_traj_ext == '.dcd':
            traj = Trajectory.load_from_xtc(file_list, PDBFilename=self.conf_filename)
        else:
            raise ValueError()
        return traj
Ejemplo n.º 29
0
def test2():
    """
    This test shows how to create new trj files with hc strings.
    """
    path ="/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories"
    for i in range(0,100):
        Trajfile = "%s/trj%d.lh5"%(path,i)
        if os.path.exists(Trajfile):
            T = Trajectory.LoadFromLHDF(Trajfile)
            CreateTrajFileWithHCstrings(T)
    print "Done."
Ejemplo n.º 30
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     conf = self.load_conf()
     for i in xrange(self.n_trajs):
         shape = Trajectory.load_trajectory_file(self.traj_filename(i),
                                                 JustInspect=True,
                                                 Conf=conf)
         lengths[i] = shape[0]
         n_atoms[i] = shape[1]
     return lengths, n_atoms
Ejemplo n.º 31
0
def run(project, assignments, num_confs_per_state, random_source=None):
    """
    Pull random confs from each state in an MSM
    
    Parameters
    ----------
    project : msmbuilder.Project
        Used to load up the trajectories, get topology
    assignments : np.ndarray, dtype=int
        State membership for each frame
    num_confs_per_state : int
        number of conformations to pull from each state
    random_source : numpy.random.RandomState, optional
        If supplied, random numbers will be pulled from this random source,
        instead of the default, which is np.random. This argument is used
        for testing, to ensure that the random number generator always
        gives the same stream.
        
    Notes
    -----
    A new random_source can be initialized by calling numpy.random.RandomState(seed)
    with whatever seed you like. See http://stackoverflow.com/questions/5836335/consistenly-create-same-random-numpy-array
    for some discussion.
                
    """

    if random_source is None:
        random_source = np.random

    n_states = max(assignments.flatten()) + 1
    logger.info("Pulling %s confs for each of %s confs", num_confs_per_state,
                n_states)

    inv = MSMLib.invert_assignments(assignments)
    xyzlist = []
    for s in xrange(n_states):
        trj, frame = inv[s]
        # trj and frame are a list of indices, such that
        # project.load_traj(trj[i])[frame[i]] is a frame assigned to state s
        for j in xrange(num_confs_per_state):
            r = random_source.randint(len(trj))
            xyz = Trajectory.read_frame(project.traj_filename(trj[r]),
                                        frame[r])
            xyzlist.append(xyz)

    # xyzlist is now a list of (n_atoms, 3) arrays, and we're going
    # to stack it along the third dimension
    xyzlist = np.dstack(xyzlist)
    # load up the conf to get the topology, put then pop in the new coordinates
    output = project.load_conf()
    output['XYZList'] = xyzlist

    return output
Ejemplo n.º 32
0
def main(modeldir, start, type):
    start=int(start)
    data=dict()
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0])
    pdb=files[0]
    unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int)
    T=mmread('%s/tProb.mtx' % modeldir)
    startstate=unbound[start]
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    steps=100000
    print "on start state %s" % startstate
    if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)):
        print "loading from states"
        traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate))
    else:
        traj=msm_analysis.sample(T, int(startstate),int(steps))
        numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj)
    print "checking for chkpt file"
    checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate))
    if len(checkfile) > 0:
        movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb)
        n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0])
        os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0]))
        print "checkpointing at state index %s out of %s" % (n, len(traj))
        checkfile=checkfile[0]
        restart=True
    else:
        restart=False
        n=0
        movie=project.empty_traj()
    while n < len(traj):
        print "on state %s" % n
        state=int(traj[n])
        t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10)
        if n==0:
            movie['XYZList']=t[0]['XYZList']
            n+=1
            continue
        elif n % 100==0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            if restart==True:
                os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0]))
            movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n))
            checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)
            n+=1
            continue
        elif n!=0:
            movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList']))
            n+=1
            continue
    movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
Ejemplo n.º 33
0
    def test_g_GetRandomConfs(self):
        P1 = Project.load_from(ProjectFn)
        Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')

        # make a predictable stream of random numbers by seeding the RNG with 42
        random_source = np.random.RandomState(42)
        randomconfs = GetRandomConfs.run(P1, Assignments,
                                         NumRandomConformations, random_source)

        reference = Trajectory.load_trajectory_file(
            os.path.join(ReferenceDir, "2RandomConfs.lh5"))
        self.assert_trajectories_equal(reference, randomconfs)
Ejemplo n.º 34
0
    def test_c_Cluster(self):
        # We need to be sure to skip the stochastic k-mediods
        cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format(
            project=ProjectFn,
            stride=Stride,
            atomindices="AtomIndices.dat",
            rmsdcutoff=RMSDCutoff)
        print cmd

        os.system(cmd)

        try:
            os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5'))
            os.remove(
                os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances'))
        except:
            pass

        G = Trajectory.load_trajectory_file(GensPath)
        r_G = Trajectory.load_trajectory_file(ReferenceDir + '/' + GensPath)
        self.assert_trajectories_equal(G, r_G)
Ejemplo n.º 35
0
def get_project_object( traj_directory, conf_filename, out_filename=None ):
    """
    This function constructs a msmbuilder.Project object 
    given a directory of trajectories saved as .lh5's. 

    Note that this is only really necessary when a script
    like ConvertDataToLHDF.py converts the data but fails
    to write out the ProjectInfo.yaml file.

    This function can also be used to combine two projects
    by copying and renaming the trajectories in a new 
    folder. Though, it's probably more efficient to just
    do some bash stuff to cat the ProjectInfo.yaml's 
    together and rename the trajectories.
    
    Inputs:
    -------
    1) traj_directory : directory to find the trajectories
    2) conf_filename : file to find the conformation
    3) out_filename [ None ] : if None, then this function 
        does not save the project file, but if given, the
        function will save the project file and also
        return the object

    Outputs:
    -------
    project : msmbuilder.Project object corresponding to 
        your project.
    """

    traj_paths = sorted( os.listdir( traj_directory ), key=keynat ) # relative to the traj_directory
    traj_paths = [ os.path.join( traj_directory, filename ) for filename in traj_paths ] # relative to current directory

    traj_lengths = []

    for traj_filename in traj_paths: # Get the length of each trajectory
        logger.info( traj_filename )
        traj_lengths.append( Trajectory.load_from_lhdf( traj_filename, JustInspect=True )[0] ) 
        # With JustInspect=True this just returns the shape of the XYZList

    project = Project({'conf_filename': conf_filename,
                       'traj_lengths': traj_lengths,
                       'traj_paths': traj_paths,
                       'traj_errors': [None] * len(traj_paths),
                       'traj_converted_from': [ [None] ] * len(traj_paths) })

    if out_filename is None:
        return project
    else:
        project.save( out_filename )
        logger.info('Saved project file to %s', out_filename)
        return project
def calculatedistance(AtomName1, ResidueID1, AtomName2, ResidueID2, trajfile,
                      LongestTrajLength):
    """ Calculate the distance between given two atoms in given trajectory"""
    t = Trajectory.LoadFromLHDF(trajfile)
    Atom1 = (t['AtomNames'] == AtomName1) * (t['ResidueID'] == ResidueID1)
    Atom2 = (t['AtomNames'] == AtomName2) * (t['ResidueID'] == ResidueID2)
    distance = []
    for i in range(len(t['XYZList'])):
        x = (t['XYZList'][i, Atom1, :] - t['XYZList'][i, Atom2, :])[0]
        x = x.tolist()
        distance.append(np.dot(x, x)**0.5)
    distance += [-1] * (LongestTrajLength - len(t['XYZList']))
    return distance
Ejemplo n.º 37
0
def FixGenFile(Mapping, GenFile, Outfile='./Gens.Fixed.lh5'):
    """
    Use Mapping.dat file get a fixed(mapped) generator file.
    New generator file will be Gens.Fixed.lh5
    """
    gen = Trajectory.LoadFromLHDF(GenFile)
    newgen = copy.deepcopy(gen)
    mapping = loadtxt(Mapping)
    GeneratorStateID = np.arange(len(gen['XYZList']))
    newgen['StateID'] = GeneratorStateID[mapping >= 0]
    newgen['XYZList'] = gen['XYZList'][mapping >= 0, :, :]
    print "Save to %s" % Outfile
    newgen.SaveToLHDF(Outfile)
Ejemplo n.º 38
0
def test_gpurmsd():
    traj = Trajectory.load_trajectory_file(trj_path)    

    gpurmsd = GPURMSD()
    ptraj = gpurmsd.prepare_trajectory(traj)
    gpurmsd._gpurmsd.print_params()
    gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0)

    cpurmsd = RMSD()
    ptraj = cpurmsd.prepare_trajectory(traj)
    cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)
    
    npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
Ejemplo n.º 39
0
def run( project, output, num_procs=1, chunk_size=50000, traj_fn='all' ):

    pool = mp.Pool( num_procs )

    dssp_assignments = []

    if traj_fn.lower() == 'all':

        for i in xrange( project.n_trajs ):
            traj_dssp_assignments = []
            N = project.traj_lengths[i]
            j = 0
            for trj_chunk in Trajectory.enum_chunks_from_lhdf( project.traj_filename( i ), ChunkSize=chunk_size ):
                result = pool.map_async( analyze_conf, trj_chunk['XYZList'] )
                result.wait()

                traj_dssp_assignments.extend( result.get() )

                j+=len(trj_chunk)
                print "Trajectory %d: %d / %d" % (i, j, N)
            dssp_assignments.append( traj_dssp_assignments )
    
    else:
        traj_dssp_assignments = []
        N = Trajectory.load_from_lhdf(traj_fn, JustInspect=True)[0]
        j = 0
        for trj_chunk in Trajectory.enum_chunks_from_lhdf(traj_fn, ChunkSize=chunk_size):
            result = pool.map_async(analyze_conf, trj_chunk['XYZList'])
            result.wait()

            traj_dssp_assignments.extend(result.get())

            j+=len(trj_chunk)
            print "Trajectory %s: %d / %d" % (traj_fn, j, N)
        dssp_assignments.append(traj_dssp_assignments)

    dssp_assignments = np.array( dssp_assignments )
    np.save( output, dssp_assignments )
    DEVNULL.close()
Ejemplo n.º 40
0
def test_gpurmsd():
    traj = Trajectory.load_trajectory_file(trj_path)

    gpurmsd = GPURMSD()
    ptraj = gpurmsd.prepare_trajectory(traj)
    gpurmsd._gpurmsd.print_params()
    gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0)

    cpurmsd = RMSD()
    ptraj = cpurmsd.prepare_trajectory(traj)
    cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)

    npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
Ejemplo n.º 41
0
def run(project, assignments, num_confs_per_state, random_source=None):
    """
    Pull random confs from each state in an MSM
    
    Parameters
    ----------
    project : msmbuilder.Project
        Used to load up the trajectories, get topology
    assignments : np.ndarray, dtype=int
        State membership for each frame
    num_confs_per_state : int
        number of conformations to pull from each state
    random_source : numpy.random.RandomState, optional
        If supplied, random numbers will be pulled from this random source,
        instead of the default, which is np.random. This argument is used
        for testing, to ensure that the random number generator always
        gives the same stream.
        
    Notes
    -----
    A new random_source can be initialized by calling numpy.random.RandomState(seed)
    with whatever seed you like. See http://stackoverflow.com/questions/5836335/consistenly-create-same-random-numpy-array
    for some discussion.
                
    """
    
    if random_source is None:
        random_source = np.random
    
    n_states = max(assignments.flatten()) + 1
    logger.info("Pulling %s confs for each of %s confs", num_confs_per_state, n_states)
    
    inv = MSMLib.invert_assignments(assignments)
    xyzlist = []
    for s in xrange(n_states):
        trj, frame = inv[s]
        # trj and frame are a list of indices, such that
        # project.load_traj(trj[i])[frame[i]] is a frame assigned to state s
        for j in xrange(num_confs_per_state):
            r = random_source.randint(len(trj))
            xyz = Trajectory.read_frame(project.traj_filename(trj[r]), frame[r])
            xyzlist.append(xyz)
            
    # xyzlist is now a list of (n_atoms, 3) arrays, and we're going
    # to stack it along the third dimension 
    xyzlist = np.dstack(xyzlist)
    # load up the conf to get the topology, put then pop in the new coordinates
    output = project.load_conf()
    output['XYZList'] = xyzlist
    
    return output
Ejemplo n.º 42
0
 def setUp(self):
     
     test_dir = os.path.join( reference_dir(), 'cfep_reference/' )
 
     self.generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5')
     N = len(self.generators)
 
     self.counts = io.mmread(test_dir + 'tCounts.mtx')
     self.lag_time = 1.0
     self.pfolds = np.random.rand(N)
     self.rescale = False
     
     self.reactant = 0
     self.product  = N
Ejemplo n.º 43
0
def Reference_Rg(trajfile):
    """
    Compute the Rg from single trajfile.
    """

    t = Trajectory.LoadFromLHDF(trajfile)
    Rg = []
    for i in range(len(t['XYZList'])):
        XYZ = t['XYZList'][i, :, :]
        mu = XYZ.mean(0)
        mu = mu.tolist()
        XYZ2 = XYZ - np.tile(mu, (len(XYZ), 1))
        Rg.append(((XYZList**2).sum() / n_atoms)**(0.5))
    return Rg
Ejemplo n.º 44
0
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")

    #arglib.die_if_path_exists(args.output_dir)
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    if isinstance(metric, metrics.RMSD):
        # this is really bad design, and we're going to fix it soon in
        # MSMBuilder3, but here's the deal. When Cluster.py loads up the
        # trajectories (Cluster.py:load_trajectories()), it only loads the
        # required indices for RMSD. This means that when it saves the Gens
        # file, that file contains only a subset of the atoms. So when
        # we run *this* script, we need to perform a restricted load of the
        # the trajectories on disk, but we need to NOT perform a restricted
        # load of the gens.lh5 file. (By restricted load, I mean loading
        # only a subset of the data in the file)
        if gens['XYZList'].shape[1] != len(metric.atomindices):
            msg = ('Using RMSD clustering/assignment, this script expects '
                   'that the Cluster.py script saves a generators file that '
                   'only contains the indices of the atoms of interest, and '
                   'not any of the superfluous degrees of freedom that were '
                   'not used for clustering. But you supplied %d cluster '
                   'centers each containg %d atoms. Your atom indices file '
                   'on the other hand contains %d atoms') \
                    % (gens['XYZList'].shape[0], gens['XYZList'].shape[1],
                       len(metric.atomindices))
            raise ValueError(msg)


        # now that we're telling the assign function only to load up a
        # subset of the atoms, an the generator is already only a subset,
        # the actual RMSD object needs to, from ITS perspective, operate on
        # every degree of freedom. So it shouldn't be aware of any special
        # atom_indices
        atom_indices = metric.atomindices
        metric.atomindices = None
        # this runs assignment and prints them to disk
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path, atom_indices_to_load=atom_indices)
    else:
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path)

    logger.info('All Done!')
Ejemplo n.º 45
0
def test_traj_0():
    
    aind = np.unique( np.random.randint( 22, size=4) )
    stride = np.random.randint(1, 100 )
    
    r_traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride=1 )

    r_traj.restrict_atom_indices( aind )

    r_traj['XYZList'] = r_traj['XYZList'][ ::stride ]

    traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride = stride, AtomIndices = aind )

    for key in traj.keys():
        if key in ['SerializerFilename'] :
            continue
        
        if key in ['IndexList']:
            for row, r_row in zip( traj[key], r_traj[key] ):
                npt.assert_array_equal( row, r_row )
        elif key == 'XYZList':
            npt.assert_array_almost_equal( traj[key], r_traj[key])
        else:
            npt.assert_array_equal( traj[key], r_traj[key] )
Ejemplo n.º 46
0
    def setUp(self):

        test_dir = os.path.join(reference_dir(), 'cfep_reference/')

        self.generators = Trajectory.load_trajectory_file(test_dir +
                                                          'Gens.lh5')
        N = len(self.generators)

        self.counts = io.mmread(test_dir + 'tCounts.mtx')
        self.lag_time = 1.0
        self.pfolds = np.random.rand(N)
        self.rescale = False

        self.reactant = 0
        self.product = N
Ejemplo n.º 47
0
    def _load_traj(self, file_list):
        """
        Load a set of xtc or dcd files as a single trajectory

        Note that the ordering of `file_list` is relevant, as the trajectories
        are catted together.

        Returns
        -------
        traj : msmbuilder.Trajectory
        """

        if self.input_traj_ext == '.xtc':
            traj = Trajectory.load_from_xtc(file_list, Conf=self.conf,
                        discard_overlapping_frames=True)
        elif self.input_traj_ext == '.dcd':
            traj = Trajectory.load_from_dcd(file_list, Conf=self.conf,
                        discard_overlapping_frames=True)
        else:
            raise ValueError()
        # return the number of files loaded, which in this case is all or
        # nothing, since an error is raised if the Trajectory.load_from_<ext> 
        # doesn't work
        return traj, len(file_list)
Ejemplo n.º 48
0
def _generate_equilibration_job():
    """Generate a single equilibration job from the first forcefield
    
    No parameters -- reads from the database and from the Project file to get info.
    
    Returns
    -------
    traj : models.Trajectory
        An unsaved trajectory. Note that we "attach" the conformation that we want
        to start from to the object as traj.init_pdb.
    """
        
    logger.info('Constructing initial equilibration job')
    conf = msmbuilder.Trajectory.load_from_pdb(Project().pdb_topology_file)
        
        
    if Project().starting_confs_lh5 is None:
        # start from pdb_topology_file
        # copy to a new location so that the 'conf' can be deleted without
        # looseing our topology file
        logger.info('Using pdb topolgy to start equlibration run')
        name = 'equilibration, starting from pdb toplogy'
    else:
        num_frames = msmbuilder.Trajectory.load_from_lhdf(Project().starting_confs_lh5, JustInspect=True)[0]
        r = np.random.randint(num_frames)
        xyz = msmbuilder.Trajectory.read_lhdf_frame(Project().starting_confs_lh5, r)
        conf['XYZList'] = np.array([xyz])
        logger.info('Using frame %s of starting_confs_lh5 (%s) to start equilibration run' % (r, Project().starting_confs_lh5))
        name = 'equilibration, starting from frame %s of starting_confs_lh5 (%s)' %  (r, Project().starting_confs_lh5) 
        
    forcefield = Session.query(Forcefield).first()
    
    trj = Trajectory(forcefield=forcefield,
                     name=name, mode='Equilibration')
    trj.init_pdb = conf
    return trj
Ejemplo n.º 49
0
def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices):

    #project = Project.load_from_hdf(options.projectfn)
    traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf)

    # you could replace this with your own metric if you like
    metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    ppdb = metric.prepare_trajectory(pdb)
    ptraj = metric.prepare_trajectory(traj)

    print ppdb['XYZList'].shape
    print ptraj['XYZList'].shape

    distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0)
    print distances
    return distances
Ejemplo n.º 50
0
def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices):

    #project = Project.load_from_hdf(options.projectfn)
    traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf)

    # you could replace this with your own metric if you like
    metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    ppdb = metric.prepare_trajectory(pdb)
    ptraj = metric.prepare_trajectory(traj)

    print ppdb['XYZList'].shape
    print ptraj['XYZList'].shape

    distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0)
    print distances
    return distances
Ejemplo n.º 51
0
def test():

    from msmbuilder import Trajectory
    from scipy import io

    print "Testing cfep code...."

    test_dir = '/Users/TJ/Programs/msmbuilder.sandbox/tjlane/cfep/'
    
    generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5')
    counts = io.mmread(test_dir + 'tCounts.mtx')
    reactant = 0    # generator w/max RMSD
    product = 10598 # generator w/min RMSD
    pfolds = np.loadtxt(test_dir + 'FCommittors.dat')

    # test the usual coordinate
    #pfold_cfep = CutCoordinate(counts, generators, reactant, product)
    #pfold_cfep.set_coordinate_values(pfolds)
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_eigvector2()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_committors()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    # test the Variable Coordinate
    initial_weights = np.ones( (1225,26104) )

    contact_cfep = VariableCoordinate(contact_reaction_coordinate, initial_weights,
                                      counts, generators, reactant, product)

    contact_cfep.evaluate_partition_functions()
    print contact_cfep.zh
    print contact_cfep.zc

    contact_cfep.optimize()
    print "Finished optimization"

    contact_cfep.plot()


    return
Ejemplo n.º 52
0
def test():

    from msmbuilder import Trajectory
    from scipy import io

    print "Testing cfep code...."

    test_dir = '/Users/TJ/Programs/msmbuilder.sandbox/tjlane/cfep/'
    
    generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5')
    counts = io.mmread(test_dir + 'tCounts.mtx')
    reactant = 0    # generator w/max RMSD
    product = 10598 # generator w/min RMSD
    pfolds = np.loadtxt(test_dir + 'FCommittors.dat')

    # test the usual coordinate
    #pfold_cfep = CutCoordinate(counts, generators, reactant, product)
    #pfold_cfep.set_coordinate_values(pfolds)
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_eigvector2()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_committors()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    # test the Variable Coordinate
    initial_weights = np.ones( (1225,26104) )

    contact_cfep = VariableCoordinate(contact_reaction_coordinate, initial_weights,
                                      counts, generators, reactant, product)

    contact_cfep.evaluate_partition_functions()
    print contact_cfep.zh
    print contact_cfep.zc

    contact_cfep.optimize()
    print "Finished optimization"

    contact_cfep.plot()


    return
Ejemplo n.º 53
0
def main():
    parser = arglib.ArgumentParser(
        description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""",
        get_metric=True
    )  #, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('project')
    parser.add_argument(dest='generators',
                        help='''Output trajectory file containing
        the structures of each of the cluster centers. Note that for hierarchical clustering
        methods, this file will not be produced.''',
                        default='Data/Gens.lh5')
    parser.add_argument('output_dir')

    args, metric = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)

    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path,
                           distances_path)

    logger.info('All Done!')
def run(project, pdb, metric, traj_fn=None):

    ppdb = metric.prepare_trajectory(pdb)

    if traj_fn == None:
        distances = -1 * np.ones((project.n_trajs, np.max(project.traj_lengths)))

        for i in xrange(project.n_trajs):
            logger.info("Working on Trajectory %d", i)
            ptraj = metric.prepare_trajectory(project.load_traj(i))
            d = metric.one_to_all(ppdb, ptraj, 0)
            distances[i, 0 : len(d)] = d
    else:
        traj = Trajectory.load_trajectory_file(traj_fn)
        ptraj = metric.prepare_trajectory(traj)

        distances = metric.one_to_all(ppdb, ptraj, 0)

    return distances
Ejemplo n.º 55
0
def run(project, pdb, metric, traj_fn=None):

    ppdb = metric.prepare_trajectory(pdb)

    if traj_fn == None:
        distances = -1 * np.ones(
            (project.n_trajs, np.max(project.traj_lengths)))

        for i in xrange(project.n_trajs):
            logger.info("Working on Trajectory %d", i)
            ptraj = metric.prepare_trajectory(project.load_traj(i))
            d = metric.one_to_all(ppdb, ptraj, 0)
            distances[i, 0:len(d)] = d
    else:
        traj = Trajectory.load_trajectory_file(traj_fn)
        ptraj = metric.prepare_trajectory(traj)

        distances = metric.one_to_all(ppdb, ptraj, 0)

    return distances