コード例 #1
0
def plot_gpu_cmd_correlation():
    traj1 = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)
    traj1_copy = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)

    #traj2 = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)
    #traj2_copy = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)

    def gpudist(t):
        gpurmsd = GPURMSD()
        pt = gpurmsd.prepare_trajectory(t)
        gpurmsd._gpurmsd.print_params()
        return gpurmsd.one_to_all(pt, pt, 0)

    def cpudist(t):
        rmsd = RMSD()
        pt = rmsd.prepare_trajectory(t)
        return rmsd.one_to_all(pt, pt, 0)

    g1 = gpudist(traj1)  #, gpudist(traj2)
    c1 = cpudist(traj1_copy)  #, cpudist(traj2_copy)

    pp.subplot(231)
    pp.plot(c1)
    pp.title('cpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    pp.subplot(232)
    pp.scatter(g1, c1)
    pp.xlabel('gpu rmsd')
    pp.ylabel('cpu rmsd')

    pp.subplot(233)
    pp.plot(g1)
    pp.title('gpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')

    #PLOT c2 and g2 in the lower portion of the graph

    #pp.subplot(234)
    #pp.plot(c2)
    #pp.title('cpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplot(235)
    #pp.scatter(g2, c2)
    #pp.xlabel('gpu rmsd')
    #pp.ylabel('cpu rmsd')

    #pp.subplot(236)
    #pp.plot(g2)
    #pp.title('gpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplots_adjust(hspace=0.4)
    #pp.savefig('gpucpu_correlation.png')
    pp.show()
コード例 #2
0
ファイル: test_gpurmsd.py プロジェクト: AgnesHH/msmbuilder
def plot_gpu_cmd_correlation():
    traj1 = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)
    traj1_copy = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf)
    #traj2 = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)
    #traj2_copy = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf)

    def gpudist(t):
        gpurmsd = GPURMSD()
        pt = gpurmsd.prepare_trajectory(t)
        gpurmsd._gpurmsd.print_params()
        return gpurmsd.one_to_all(pt, pt, 0)
    def cpudist(t):
        rmsd = RMSD()
        pt = rmsd.prepare_trajectory(t)
        return rmsd.one_to_all(pt, pt, 0)
    g1 = gpudist(traj1) #, gpudist(traj2)
    c1 = cpudist(traj1_copy) #, cpudist(traj2_copy)

    pp.subplot(231)
    pp.plot(c1)
    pp.title('cpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    pp.subplot(232)
    pp.scatter(g1, c1)
    pp.xlabel('gpu rmsd')
    pp.ylabel('cpu rmsd')

    pp.subplot(233)
    pp.plot(g1)
    pp.title('gpu rmsd drift along traj')
    pp.xlabel('frame index')
    pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')


    #PLOT c2 and g2 in the lower portion of the graph

    #pp.subplot(234)
    #pp.plot(c2)
    #pp.title('cpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplot(235)
    #pp.scatter(g2, c2)
    #pp.xlabel('gpu rmsd')
    #pp.ylabel('cpu rmsd')

    #pp.subplot(236)
    #pp.plot(g2)
    #pp.title('gpu rmsd drift along pre-aligned traj')
    #pp.xlabel('frame index')
    #pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)')

    #pp.subplots_adjust(hspace=0.4)
    #pp.savefig('gpucpu_correlation.png')
    pp.show()
コード例 #3
0
    def __init__(self, structure_or_filename, metric, max_distance):
        """Create an explosion validator
    
        Checks the distance from every frame to a structure and
        watches for things that are too far away
        
        Parameters
        ----------
        structure_or_filename : {msmbuilder.Trajectory, str}
            The structure to measure distances to, either as a trajectory (the first
            frame is the only one that counts) or a path to a trajectory
            on disk that can be loaded
        metric : msmbuilder distance metric
            Metric by which you want to measure distance
        max_distance : float
            The threshold distance, above which a ValidationError
            will be thrown
        """

        if isinstance(structure_or_filename, Trajectory):
            conf = structure_or_filename
        elif isinstance(structure_or_filename, basestring):
            conf = Trajectory.load_trajectory_file(structure_or_filename)

        self.max_distance = max_distance
        self.metric = metric
        self._pconf = self.metric.prepare_trajectory(conf)
コード例 #4
0
ファイル: validators.py プロジェクト: chrismichel/msmbuilder
    def __init__(self, structure_or_filename, metric, max_distance):
        """Create an explosion validator
    
        Checks the distance from every frame to a structure and
        watches for things that are too far away
        
        Parameters
        ----------
        structure_or_filename : {msmbuilder.Trajectory, str}
            The structure to measure distances to, either as a trajectory (the first
            frame is the only one that counts) or a path to a trajectory
            on disk that can be loaded
        metric : msmbuilder distance metric
            Metric by which you want to measure distance
        max_distance : float
            The threshold distance, above which a ValidationError
            will be thrown
        """

        if isinstance(structure_or_filename, Trajectory):
            conf = structure_or_filename
        elif isinstance(structure_or_filename, basestring):
            conf = Trajectory.load_trajectory_file(structure_or_filename)

        self.max_distance = max_distance
        self.metric = metric
        self._pconf = self.metric.prepare_trajectory(conf)
コード例 #5
0
    def save(self):
        "Save the trajs as a n MSMBuilder project"

        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None  # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project(
            {
                'conf_filename': os.path.abspath(self.conf_filename),
                'traj_lengths': self.n_frames * np.ones(self.n_trajs),
                'traj_paths': [os.path.abspath(e) for e in traj_paths],
                'traj_converted_from': [[] for i in range(self.n_trajs)],
                'traj_errors': [None for i in range(self.n_trajs)],
            },
            project_dir=self.project_dir,
            validate=True)
        p.save(pjoin(self.project_dir, 'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir, 'Project.yaml'))
        p._validate()
        assert np.all(
            (p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
コード例 #6
0
ファイル: create_project.py プロジェクト: rmcgibbo/diffusion
    def save(self):
        "Save the trajs as a n MSMBuilder project"
        
        traj_dir = pjoin(self.project_dir, 'Trajectories')
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)

        t = Trajectory.load_trajectory_file(self.conf_filename)

        traj_paths = []
        for i, xyz in enumerate(self.trajectories):
            t['IndexList'] = None # bug in msmbuilder
            t['XYZList'] = xyz

            traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i))
            t.save(traj_paths[-1])

        p = Project({'conf_filename': os.path.abspath(self.conf_filename),
            'traj_lengths': self.n_frames*np.ones(self.n_trajs),
            'traj_paths': [os.path.abspath(e) for e in traj_paths],
            'traj_converted_from': [[] for i in range(self.n_trajs)],
            'traj_errors': [None for i in range(self.n_trajs)],
            }, project_dir=self.project_dir, validate=True)
        p.save(pjoin(self.project_dir,'Project.yaml'))

        # just check again
        p = Project.load_from(pjoin(self.project_dir,'Project.yaml'))
        p._validate()
        assert np.all((p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
コード例 #7
0
ファイル: test_wrappers.py プロジェクト: dvanatta/msmbuilder
    def test(self):
        from msmbuilder.scripts.SaveStructures import save

        project = get('ProjectInfo.yaml')
        assignments = get('Assignments.h5')['arr_0']
        which_states = [0, 1, 2]
        list_of_trajs = project.get_random_confs_from_states(assignments, 
            which_states, num_confs=2, replacement=True,
            random=np.random.RandomState(42))

        assert isinstance(list_of_trajs, list)
        assert isinstance(list_of_trajs[0], Trajectory)
        eq(len(list_of_trajs), len(which_states))
        for t in list_of_trajs:
            eq(len(t), 2)

        print list_of_trajs[0].keys()
        # sep, tps, one
        save(list_of_trajs, which_states, style='sep', format='lh5', outdir=self.td)
        save(list_of_trajs, which_states, style='tps', format='lh5', outdir=self.td)
        save(list_of_trajs, which_states, style='one', format='lh5', outdir=self.td)

        names = ['State0-0.lh5', 'State0-1.lh5', 'State0.lh5', 'State1-0.lh5',
                'State1-1.lh5', 'State1.lh5', 'State2-0.lh5', 'State2-1.lh5',
                'State2.lh5']

        for name in names:
            t = Trajectory.load_trajectory_file(pjoin(self.td, name))
            eq(t, get('save_structures/' + name))
コード例 #8
0
ファイル: test_asa.py プロジェクト: jimsnyderjr/msmbuilder
def test_asa_2():
    t = Trajectory.load_trajectory_file(os.path.join(fixtures_dir(), 'trj0.lh5'))
    val1 = np.sum(calculate_asa(t[0])) # calculate only frame 0
    val2 = np.sum(calculate_asa(t)[0]) # calculate on all frames
    true_frame_0_asa = 2.859646797180176
    
    npt.assert_approx_equal(true_frame_0_asa, val1)
    npt.assert_approx_equal(true_frame_0_asa, val2)
コード例 #9
0
    def test_c_Cluster(self):
        # We need to be sure to skip the stochastic k-mediods
        cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format(project=ProjectFn, stride=Stride, atomindices="AtomIndices.dat", rmsdcutoff=RMSDCutoff)
        print cmd

        os.system(cmd)
        
        try:
            os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5'))
            os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances'))
        except:
            pass

        
        G   = Trajectory.load_trajectory_file(GensPath)
        r_G = Trajectory.load_trajectory_file(ReferenceDir +'/'+ GensPath)
        self.assert_trajectories_equal(G, r_G)
コード例 #10
0
ファイル: remote.py プロジェクト: jimsnyderjr/msmbuilder
def load_gens(gens_fn, conf_fn, metric):
    """Setup a worker by adding pgens to its global namespace
    
    This is necessary because pgens are not necessarily picklable, so we can't
    just prepare them on the master and then push them to the remote workers --
    instead we want to actually load the pgens from disk and prepare them on
    the remote node
    """
    from msmbuilder import Trajectory
    
    global PGENS, CONF, METRIC, PREPARED
    
    METRIC = metric
    CONF = Trajectory.load_trajectory_file(conf_fn)
    gens = Trajectory.load_trajectory_file(gens_fn)
    PGENS = metric.prepare_trajectory(gens)
    PREPARED = True
コード例 #11
0
ファイル: project.py プロジェクト: chrismichel/msmbuilder
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     conf = self.load_conf()
     for i in xrange(self.n_trajs):
         shape = Trajectory.load_trajectory_file(self.traj_filename(i), JustInspect=True, Conf=conf)
         lengths[i] = shape[0]
         n_atoms[i] = shape[1]
     return lengths, n_atoms
コード例 #12
0
 def test_g_GetRandomConfs(self):
     P1 = Project.load_from(ProjectFn)
     Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')
     
     # make a predictable stream of random numbers by seeding the RNG with 42
     random_source = np.random.RandomState(42)
     randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source)
     
     reference = Trajectory.load_trajectory_file(os.path.join(ReferenceDir, "2RandomConfs.lh5"))
     self.assert_trajectories_equal(reference, randomconfs)
コード例 #13
0
ファイル: Assign.py プロジェクト: chrismichel/msmbuilder
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path, distances_path)

    logger.info('All Done!')
コード例 #14
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     conf = self.load_conf()
     for i in xrange(self.n_trajs):
         shape = Trajectory.load_trajectory_file(self.traj_filename(i),
                                                 JustInspect=True,
                                                 Conf=conf)
         lengths[i] = shape[0]
         n_atoms[i] = shape[1]
     return lengths, n_atoms
コード例 #15
0
    def test_g_GetRandomConfs(self):
        P1 = Project.load_from(ProjectFn)
        Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0')

        # make a predictable stream of random numbers by seeding the RNG with 42
        random_source = np.random.RandomState(42)
        randomconfs = GetRandomConfs.run(P1, Assignments,
                                         NumRandomConformations, random_source)

        reference = Trajectory.load_trajectory_file(
            os.path.join(ReferenceDir, "2RandomConfs.lh5"))
        self.assert_trajectories_equal(reference, randomconfs)
コード例 #16
0
    def test_c_Cluster(self):
        # We need to be sure to skip the stochastic k-mediods
        cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format(
            project=ProjectFn,
            stride=Stride,
            atomindices="AtomIndices.dat",
            rmsdcutoff=RMSDCutoff)
        print cmd

        os.system(cmd)

        try:
            os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5'))
            os.remove(
                os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances'))
        except:
            pass

        G = Trajectory.load_trajectory_file(GensPath)
        r_G = Trajectory.load_trajectory_file(ReferenceDir + '/' + GensPath)
        self.assert_trajectories_equal(G, r_G)
コード例 #17
0
def test_gpurmsd():
    traj = Trajectory.load_trajectory_file(trj_path)

    gpurmsd = GPURMSD()
    ptraj = gpurmsd.prepare_trajectory(traj)
    gpurmsd._gpurmsd.print_params()
    gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0)

    cpurmsd = RMSD()
    ptraj = cpurmsd.prepare_trajectory(traj)
    cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)

    npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
コード例 #18
0
ファイル: test_gpurmsd.py プロジェクト: AgnesHH/msmbuilder
def test_gpurmsd():
    traj = Trajectory.load_trajectory_file(trj_path)    

    gpurmsd = GPURMSD()
    ptraj = gpurmsd.prepare_trajectory(traj)
    gpurmsd._gpurmsd.print_params()
    gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0)

    cpurmsd = RMSD()
    ptraj = cpurmsd.prepare_trajectory(traj)
    cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)
    
    npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
コード例 #19
0
ファイル: test_asa.py プロジェクト: jimsnyderjr/msmbuilder
def test_asa_3():

    traj_ref = np.loadtxt( os.path.join(reference_dir(),'g_sas_ref.dat'))
    Conf = Trajectory.load_from_pdb(os.path.join( fixtures_dir(), 'native.pdb'))

    traj = Trajectory.load_trajectory_file( os.path.join(fixtures_dir(), 'trj0.xtc') , Conf=Conf)
    traj_asa = calculate_asa(traj, probe_radius=0.14, n_sphere_points = 960)
    
    # the algorithm used by gromacs' g_sas is slightly different than the one
    # used here, so the results are not exactly the same -- see the comments
    # in src/python/geomtry/asa.py or the readme file src/ext/asa/README.txt
    # for details
    npt.assert_array_almost_equal(traj_asa, traj_ref, decimal=2)    
コード例 #20
0
ファイル: test_cfep.py プロジェクト: jimsnyderjr/msmbuilder
 def setUp(self):
     
     test_dir = os.path.join( reference_dir(), 'cfep_reference/' )
 
     self.generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5')
     N = len(self.generators)
 
     self.counts = io.mmread(test_dir + 'tCounts.mtx')
     self.lag_time = 1.0
     self.pfolds = np.random.rand(N)
     self.rescale = False
     
     self.reactant = 0
     self.product  = N
コード例 #21
0
ファイル: Assign.py プロジェクト: raviramanathan/msmbuilder
def main(args, metric):
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")

    #arglib.die_if_path_exists(args.output_dir)
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    if isinstance(metric, metrics.RMSD):
        # this is really bad design, and we're going to fix it soon in
        # MSMBuilder3, but here's the deal. When Cluster.py loads up the
        # trajectories (Cluster.py:load_trajectories()), it only loads the
        # required indices for RMSD. This means that when it saves the Gens
        # file, that file contains only a subset of the atoms. So when
        # we run *this* script, we need to perform a restricted load of the
        # the trajectories on disk, but we need to NOT perform a restricted
        # load of the gens.lh5 file. (By restricted load, I mean loading
        # only a subset of the data in the file)
        if gens['XYZList'].shape[1] != len(metric.atomindices):
            msg = ('Using RMSD clustering/assignment, this script expects '
                   'that the Cluster.py script saves a generators file that '
                   'only contains the indices of the atoms of interest, and '
                   'not any of the superfluous degrees of freedom that were '
                   'not used for clustering. But you supplied %d cluster '
                   'centers each containg %d atoms. Your atom indices file '
                   'on the other hand contains %d atoms') \
                    % (gens['XYZList'].shape[0], gens['XYZList'].shape[1],
                       len(metric.atomindices))
            raise ValueError(msg)


        # now that we're telling the assign function only to load up a
        # subset of the atoms, an the generator is already only a subset,
        # the actual RMSD object needs to, from ITS perspective, operate on
        # every degree of freedom. So it shouldn't be aware of any special
        # atom_indices
        atom_indices = metric.atomindices
        metric.atomindices = None
        # this runs assignment and prints them to disk
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path, atom_indices_to_load=atom_indices)
    else:
        assign_with_checkpoint(metric, project, gens, assignments_path,
            distances_path)

    logger.info('All Done!')
コード例 #22
0
ファイル: test_cfep.py プロジェクト: jimsnyderjr/msmbuilder
    def setUp(self):

        test_dir = os.path.join(reference_dir(), 'cfep_reference/')

        self.generators = Trajectory.load_trajectory_file(test_dir +
                                                          'Gens.lh5')
        N = len(self.generators)

        self.counts = io.mmread(test_dir + 'tCounts.mtx')
        self.lag_time = 1.0
        self.pfolds = np.random.rand(N)
        self.rescale = False

        self.reactant = 0
        self.product = N
コード例 #23
0
def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices):

    #project = Project.load_from_hdf(options.projectfn)
    traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf)

    # you could replace this with your own metric if you like
    metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    ppdb = metric.prepare_trajectory(pdb)
    ptraj = metric.prepare_trajectory(traj)

    print ppdb['XYZList'].shape
    print ptraj['XYZList'].shape

    distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0)
    print distances
    return distances
コード例 #24
0
ファイル: CalculateLPRMSD.py プロジェクト: AgnesHH/msmbuilder
def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices):

    #project = Project.load_from_hdf(options.projectfn)
    traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf)

    # you could replace this with your own metric if you like
    metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    ppdb = metric.prepare_trajectory(pdb)
    ptraj = metric.prepare_trajectory(traj)

    print ppdb['XYZList'].shape
    print ptraj['XYZList'].shape

    distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0)
    print distances
    return distances
コード例 #25
0
def test():

    from msmbuilder import Trajectory
    from scipy import io

    print "Testing cfep code...."

    test_dir = '/Users/TJ/Programs/msmbuilder.sandbox/tjlane/cfep/'
    
    generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5')
    counts = io.mmread(test_dir + 'tCounts.mtx')
    reactant = 0    # generator w/max RMSD
    product = 10598 # generator w/min RMSD
    pfolds = np.loadtxt(test_dir + 'FCommittors.dat')

    # test the usual coordinate
    #pfold_cfep = CutCoordinate(counts, generators, reactant, product)
    #pfold_cfep.set_coordinate_values(pfolds)
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_eigvector2()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_committors()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    # test the Variable Coordinate
    initial_weights = np.ones( (1225,26104) )

    contact_cfep = VariableCoordinate(contact_reaction_coordinate, initial_weights,
                                      counts, generators, reactant, product)

    contact_cfep.evaluate_partition_functions()
    print contact_cfep.zh
    print contact_cfep.zc

    contact_cfep.optimize()
    print "Finished optimization"

    contact_cfep.plot()


    return
コード例 #26
0
ファイル: cfep.py プロジェクト: chrismichel/msmbuilder
def test():

    from msmbuilder import Trajectory
    from scipy import io

    print "Testing cfep code...."

    test_dir = '/Users/TJ/Programs/msmbuilder.sandbox/tjlane/cfep/'
    
    generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5')
    counts = io.mmread(test_dir + 'tCounts.mtx')
    reactant = 0    # generator w/max RMSD
    product = 10598 # generator w/min RMSD
    pfolds = np.loadtxt(test_dir + 'FCommittors.dat')

    # test the usual coordinate
    #pfold_cfep = CutCoordinate(counts, generators, reactant, product)
    #pfold_cfep.set_coordinate_values(pfolds)
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_eigvector2()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    #pfold_cfep.set_coordinate_as_committors()
    #print pfold_cfep.reaction_coordinate_values
    #pfold_cfep.plot()

    # test the Variable Coordinate
    initial_weights = np.ones( (1225,26104) )

    contact_cfep = VariableCoordinate(contact_reaction_coordinate, initial_weights,
                                      counts, generators, reactant, product)

    contact_cfep.evaluate_partition_functions()
    print contact_cfep.zh
    print contact_cfep.zc

    contact_cfep.optimize()
    print "Finished optimization"

    contact_cfep.plot()


    return
コード例 #27
0
ファイル: Assign.py プロジェクト: jimsnyderjr/msmbuilder
def main():
    parser = arglib.ArgumentParser(
        description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""",
        get_metric=True
    )  #, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('project')
    parser.add_argument(dest='generators',
                        help='''Output trajectory file containing
        the structures of each of the cluster centers. Note that for hierarchical clustering
        methods, this file will not be produced.''',
                        default='Data/Gens.lh5')
    parser.add_argument('output_dir')

    args, metric = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)

    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path,
                           distances_path)

    logger.info('All Done!')
コード例 #28
0
def run(project, pdb, metric, traj_fn=None):

    ppdb = metric.prepare_trajectory(pdb)

    if traj_fn == None:
        distances = -1 * np.ones((project.n_trajs, np.max(project.traj_lengths)))

        for i in xrange(project.n_trajs):
            logger.info("Working on Trajectory %d", i)
            ptraj = metric.prepare_trajectory(project.load_traj(i))
            d = metric.one_to_all(ppdb, ptraj, 0)
            distances[i, 0 : len(d)] = d
    else:
        traj = Trajectory.load_trajectory_file(traj_fn)
        ptraj = metric.prepare_trajectory(traj)

        distances = metric.one_to_all(ppdb, ptraj, 0)

    return distances
コード例 #29
0
ファイル: distance.py プロジェクト: mlawrenz/AnaProtLigand
def main(input, atoms):
    contacts=numpy.loadtxt(atoms, dtype=int, ndmin=2)
    print contacts.shape
    for n in range(0, contacts.shape[0]):
        atom1=int(contacts[n][0])+1
        atom2=int(contacts[n][1])+1
        t=Trajectory.load_trajectory_file(input)
        index1=numpy.where(t['AtomID']==atom1)[0]
        index2=numpy.where(t['AtomID']==atom2)[0]
        print t['ResidueNames'][index1], t['AtomNames'][index1]
        name1='r%s%s' % (t['ResidueNames'][index1][0], t['AtomNames'][index1][0])
        print t['ResidueNames'][index2], t['AtomNames'][index2]
        name2='r%s%s' % (t['ResidueNames'][index2][0], t['AtomNames'][index2][0])
        dist=[]
        for frame in range(0, t['XYZList'].shape[0]):
            diff=numpy.subtract(t['XYZList'][frame][index1], t['XYZList'][frame][index2])
            dist.append(linalg.norm(diff)*10)
        new=input.split('.lh5')[0]
        numpy.savetxt('%s.%s.%s.dat' % (new, name1, name2 ), dist)
コード例 #30
0
ファイル: testing.py プロジェクト: chrismichel/msmbuilder
def load(filename):
    # delay these imports, since this module is loaded in a bunch
    # of places but not necessarily used
    import scipy.io
    from msmbuilder import Trajectory, io, Project
    
    # the filename extension
    ext = os.path.splitext(filename)[1]

    # load trajectories
    if ext in ['.lh5', '.pdb']:
        val = Trajectory.load_trajectory_file(filename)

    # load flat text files
    elif 'AtomIndices.dat' in filename:
        # try loading AtomIndices first, because the default for loadtxt
        # is to use floats
        val = np.loadtxt(filename, dtype=np.int)
    elif ext in ['.dat']:
        # try loading general .dats with floats
        val = np.loadtxt(filename)
    
    # short circuit opening ProjectInfo
    elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename):
        val = Project.load_from(filename)
        
    # load with serializer files that end with .h5, .hdf or .h5.distances
    elif ext in ['.h5', '.hdf']:
        val = io.loadh(filename, deferred=False)
    elif filename.endswith('.h5.distances'):
        val = io.loadh(filename, deferred=False)

    # load matricies
    elif ext in ['.mtx']:
        val = scipy.io.mmread(filename)
        
    else:
        raise TypeError("I could not infer how to load this file. You "
            "can either request load=False, or perhaps add more logic to "
            "the load heuristics in this class: %s" % filename)

    return val
コード例 #31
0
def run(project, pdb, metric, traj_fn=None):

    ppdb = metric.prepare_trajectory(pdb)

    if traj_fn == None:
        distances = -1 * np.ones(
            (project.n_trajs, np.max(project.traj_lengths)))

        for i in xrange(project.n_trajs):
            logger.info("Working on Trajectory %d", i)
            ptraj = metric.prepare_trajectory(project.load_traj(i))
            d = metric.one_to_all(ppdb, ptraj, 0)
            distances[i, 0:len(d)] = d
    else:
        traj = Trajectory.load_trajectory_file(traj_fn)
        ptraj = metric.prepare_trajectory(traj)

        distances = metric.one_to_all(ppdb, ptraj, 0)

    return distances
コード例 #32
0
def test_lprmsd():
    t = Trajectory.load_trajectory_file('trj0.lh5')

    MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18])

    lprmsd = LPRMSD(atomindices=MyIdx, debug=True)

    lptraj = lprmsd.prepare_trajectory(t)

    dists = lprmsd.one_to_all(lptraj, lptraj, 0)

    lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True)
    lptraj_alt = lprmsd_alt.prepare_trajectory(t)
    dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0)

    rmsd = RMSD(atomindices=MyIdx)
    reftraj = rmsd.prepare_trajectory(t)
    ref_dists = rmsd.one_to_all(reftraj, reftraj, 0)

    npt.assert_array_almost_equal(dists, ref_dists)
    npt.assert_array_almost_equal(dists_alt, ref_dists)
コード例 #33
0
ファイル: TestLPRMSD_00.py プロジェクト: AgnesHH/msmbuilder
def test_lprmsd():
    t = Trajectory.load_trajectory_file('trj0.lh5')

    MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18])

    lprmsd = LPRMSD(atomindices=MyIdx, debug=True)

    lptraj = lprmsd.prepare_trajectory(t)

    dists = lprmsd.one_to_all(lptraj, lptraj, 0)

    lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True)
    lptraj_alt = lprmsd_alt.prepare_trajectory(t)
    dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0)

    rmsd = RMSD(atomindices=MyIdx)
    reftraj = rmsd.prepare_trajectory(t)
    ref_dists = rmsd.one_to_all(reftraj, reftraj, 0)

    
    npt.assert_array_almost_equal(dists, ref_dists)
    npt.assert_array_almost_equal(dists_alt, ref_dists)
コード例 #34
0
ファイル: Assign.py プロジェクト: jimsnyderjr/msmbuilder
def main():
    parser = arglib.ArgumentParser(description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""", get_metric=True)#, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument( 'project')
    parser.add_argument( dest='generators', help='''Output trajectory file containing
        the structures of each of the cluster centers. Note that for hierarchical clustering
        methods, this file will not be produced.''', default='Data/Gens.lh5')
    parser.add_argument( 'output_dir' )
    
    args, metric = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    
    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)
    
    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path, distances_path)

    logger.info('All Done!')
コード例 #35
0
def run(traj_dir, conf_filename, project_filename):
    logger.info("Rebuilding project.")
    file_list = glob.glob(traj_dir + "/trj*.lh5")
    num_traj = len(file_list)
    
    traj_lengths = np.zeros(num_traj,'int')
    traj_paths = []
    
    file_list = sorted(file_list, key=utils.keynat)
    for i,filename in enumerate(file_list):
        traj_lengths[i] = Trajectory.load_trajectory_file(filename,JustInspect=True)[0]
        traj_paths.append(filename)    
    
    records = {
    "conf_filename":conf_filename,
    "traj_lengths":traj_lengths,
    "traj_paths":traj_paths,
    "traj_errors": [None for i in xrange(num_traj)],
    "traj_converted_from":[[] for i in xrange(num_traj)]           
    }
    
    p = Project(records)
    p.save(project_filename)
    logger.info("Wrote %s" % project_filename)
コード例 #36
0
    Returns
    -------
    centers : np.ndarray, shape=(n_frames, 3)
        The mean position in each frame that was subtracted from each
        atom
    """
    centers = np.zeros((xyzlist.shape[0], xyzlist.shape[2]))
    
    for i in xrange(xyzlist.shape[0]):
        X = xyzlist[i].astype(np.float64)
        centers[i] = X.mean(0)
        X -= centers[i]
        xyzlist[i] = X

    return centers
    
if __name__ == '__main__':
    from msmbuilder import Trajectory
    t = Trajectory.load_trajectory_file('short_traj.lh5')
    xyz = t['XYZList'][:, :320, :]
    
    centers = center(xyz)
    rotations = np.zeros((len(xyz), 3, 3))
    
    for i in range(len(xyz)):
        frame, B = align_to_moments(xyz[i])
        xyz[i] = frame
        rotations[i] = B
    
    print 
    print rotations
コード例 #37
0
ファイル: fahproject.py プロジェクト: lilipeng/msmbuilder
    def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride,
						 max_rmsd, min_gens, center_conformations, memory_check,
						 omp_parallel_rmsd=True):
        """
        This function takes in a path to a CLONE and merges all the XTC files
        it finds into a H5 trajectory:

        Parameters
        ----------
        clone_dir : str
            the directory in which the xtc files are found. All of the xtc files
            in this directory are joined together to make a single trajectory
            (.h5) output file

        output_dir : str
            directory where the outputted files will be placed

        trajectory_number : int
            A unique number for this trajectory. This number is used in
            constructing the filename to write the outputted .h5 trajectory to,
            and thus must be unique

        stride: int
            Subsample by only considering every Nth snapshop.
        max_rmsd: {int, None}
            if this value is not None, calculate the RMSD to the pdb_file from
            each snapshot and reject trajectories which have snapshots with RMSD
            greated than max_rmsd. If None, no check is performed

        min_gens : int
            Discard the trajectories that contain fewer than `min_gens` XTC files.

        center_conformations : bool
            center conformations before saving.

        memory_check : bool
            if yes, uses the memory dictionary to do an update rather than a
            complete re-convert.

        omp_parallel_rmsd : bool
            If true, use OpenMP accelerated RMSD calculation for max_rmsd check
        """

        xtc_files = self.list_xtcs_in_dir(clone_dir)

        # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 --
        # into a trajectory. If there are gaps in the xtc files in the directory, we only
        # want to use the the ones such that they are contiguously numbered
        i = 0
        for i, filename in enumerate(xtc_files):
            if self.integer_component(filename) != i:
                logger.error("Found discontinuity in xtc numbering - check data in %s", clone_dir)
                xtc_files = xtc_files[0:i]
                break


        # check the memory object to see which xtc files have already been converted, and
        # exclude those from this conversion
        if memory_check:
            if clone_dir in self.memory.keys():
                previous_convert_exists = True
                num_xtcs_converted = self.memory[clone_dir][1]
                if len(xtc_files) == num_xtcs_converted: # if we have converted everything,
                    logger.info("Already converted all files in %s, skipping...", clone_dir)
                    return                               # just bail out
                else:
                    xtc_files = xtc_files[num_xtcs_converted:]
            else:
                previous_convert_exists = False
        else:
            previous_convert_exists = False

        xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files]

        logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir)

        if len(xtc_files) <= min_gens:
            logger.info("Skipping trajectory in clone_dir = %s", clone_dir)
            logger.info("Too few xtc files (generations).")
            return

        try:
            # [this should check for and discard overlapping snapshots]
            trajectory = Trajectory.load_from_xtc(xtc_file_paths, PDBFilename=self.pdb_topology,
                                                discard_overlapping_frames=True)
        except IOError as e:
            logger.error("IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir)
            logger.error("Attempting rescue by disregarding final frame, which is often")
            logger.error("the first/only frame to be corrupted")

            if len(xtc_file_paths) == 1:
                logger.error("Didn't find any other frames in %s, continuing...", clone_dir)
                return

            try:
                trajectory = Trajectory.load_from_xtc(xtc_file_paths[0:-1], PDBFilename=self.pdb_topology)
            except IOError:
                logger.error("Unfortunately, the error remained even after ignoring the final frame.")
                logger.error("Skipping the trajectory in clone_dir = %s", clone_dir)
                return
            else:
                logger.error("Sucessfully recovered from IOError by disregarding final frame.")

        if max_rmsd is not None:
            atomindices = [ int(i)-1 for i in trajectory['AtomID'] ]
            rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd)
            ppdb = rmsdmetric.prepare_trajectory(Trajectory.load_trajectory_file(self.pdb_topology))
            ptraj = rmsdmetric.prepare_trajectory(trajectory)
            rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0)

            if max(rmsds) > max_rmsd:
                logger.warning("Snapshot %d RMSD %f > the %f cutoff" , argmax(rmsds), max(rmsds), max_rmsd)
                logger.warning("Dropping trajectory")
                return

        if center_conformations:
            RMSD.TheoData.centerConformations(trajectory["XYZList"])

        # if we are adding to a previous trajectory, we have to load that traj up and extend it
        if previous_convert_exists:
            output_filename = self.memory[clone_dir][0]
            output_file_path = output_filename
            logger.info("Extending: %s", output_filename)
            assert os.path.exists( output_filename )

            # load the traj and extend it [this should check for and discard overlapping snapshots]
            Trajectory.append_frames_to_file( output_filename,
                                           trajectory['XYZList'][::stride],
                                           discard_overlapping_frames=True )

            num_xtcs_processed = len(xtc_file_paths) + self.memory[clone_dir][1]

        # if we are not adding to a traj, then we create a new one
        else:
            output_filename = 'trj%s.h5' % trajectory_number
            output_file_path = os.path.join(output_dir, output_filename)

            if os.path.exists(output_file_path):
                logger.info("The file name %s already exists. Skipping it.", output_file_path)
                return

            # stide and discard by snapshot
            trajectory['XYZList'] = trajectory['XYZList'][::stride]
            trajectory.save(output_file_path)

            num_xtcs_processed = len(xtc_file_paths)

        # log what we did into the memory object
        self.memory[clone_dir] = [ output_file_path, num_xtcs_processed ]

        return
コード例 #38
0
ファイル: project.py プロジェクト: chrismichel/msmbuilder
 def load_traj(self, trj_index, stride=1, atom_indices=None):
     "Load the a trajectory from disk"
     filename = self.traj_filename(trj_index)
     return Trajectory.load_trajectory_file(filename, Stride=stride, 
                                            AtomIndices=atom_indices)
コード例 #39
0
import numpy as np
from msmbuilder import Trajectory
from gpurmsd.gpurmsd import GPURMSD
from msmbuilder.metrics import RMSD
import matplotlib.pyplot as pp
import numpy.testing as npt


def fixtures_dir():
    #http://stackoverflow.com/questions/50499/in-python-how-do-i-get-the-path-and-name-of-the-file-that-is-currently-executin
    return os.path.join(
        os.path.dirname(inspect.getfile(inspect.currentframe())), 'fixtures')


trj_path = os.path.join(fixtures_dir(), 'trj0.lh5')
ww_conf = Trajectory.load_trajectory_file(
    os.path.join(fixtures_dir(), 'ww.pdb'))
ww_1 = os.path.join(fixtures_dir(), 'ww.xtc')
ww_2 = os.path.join(fixtures_dir(), 'ww-aligned.xtc')


def test_gpurmsd():
    traj = Trajectory.load_trajectory_file(trj_path)

    gpurmsd = GPURMSD()
    ptraj = gpurmsd.prepare_trajectory(traj)
    gpurmsd._gpurmsd.print_params()
    gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0)

    cpurmsd = RMSD()
    ptraj = cpurmsd.prepare_trajectory(traj)
    cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)
コード例 #40
0
def run(project, assignments, conformations_per_state, states, output_dir,
        gens_file, atom_indices, permute_indices, alt_indices, total_memory):
    if states == "all":
        states = np.arange(assignments.max() + 1)
    # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )}
    inverse_assignments = defaultdict(lambda: [])
    for i in xrange(assignments.shape[0]):
        for j in xrange(assignments.shape[1]):
            inverse_assignments[assignments[i, j]].append((i, j))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    print "Setting up the metric."
    rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    # This trickery allows us to get the correct number of leading
    # zeros in the output file name no matter how many generators we have
    digits = len(str(max(states)))
    # Create a trajectory of generators and prepare it.
    if os.path.exists(gens_file):
        gens_traj = Trajectory.load_trajectory_file(gens_file)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '\"Generator-%%0%ii.pdb\"' % digits

    formstr_xtc = '\"Cluster-%%0%ii.xtc\"' % digits
    print "Loading up the trajectories."
    traj_nfiles, traj_bytes = get_size(project['TrajFilePath'])
    LoadAll = 0
    MaxMem = 0.0
    # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk.
    if (
            traj_bytes * 5
    ) < total_memory * 1073741824:  # It looks like the Python script uses roughly 5x the HDF file size in terms of memory.
        print "Loading all trajectories into memory."
        LoadAll = 1
        AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])]
        #print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576)

    if not os.path.exists(gens_file):
        if not 'AllTraj' in locals():
            raise Exception((
                'To get away with not supplying a Gens.lh5 structure to align to for each state '
                'you need to have enough memory to load all the trajectories simultaniously. This could be worked around...'
            ))
        print 'Randomly Sampling from state for structure to align everything to'
        centers_list = []
        for s in states:
            chosen = inverse_assignments[np.random.randint(
                len(inverse_assignments[s]))]
            centers_list.append(AllTraj[chosen[0]][chosen[1]])
        gens_traj = concatenate_trajectories(centers_list)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '\"Center-%%0%ii.pdb\"' % digits

    cluster_traj = project.GetEmptyTrajectory()
    # Loop through the generators.
    for s in states:
        if len(inverse_assignments[s]) == 0:
            raise ValueError('No assignments to state! %s' % s)
        if conformations_per_state == 'all':
            confs = inverse_assignments[s]
        else:
            random.shuffle(inverse_assignments[s])
            if len(inverse_assignments[s]) >= conformations_per_state:
                confs = inverse_assignments[s][0:conformations_per_state]
            else:
                confs = inverse_assignments[s]
                print 'Not enough assignments in state %s' % s
        FrameDict = {}
        for (traj, frame) in confs:
            FrameDict.setdefault(traj, []).append(frame)
        # Create a single trajectory corresponding to the frames that
        # belong to the current generator.
        if "XYZList" in cluster_traj:
            cluster_traj.pop("XYZList")
        print "Generator %i" % s,
        TrajNums = set([i[0] for i in confs])
        for i in TrajNums:
            if LoadAll:
                T = AllTraj[i][np.array(FrameDict[i])]
            else:
                T = project.LoadTraj(i)[np.array(FrameDict[i])]
            cluster_traj += T
        print " loaded %i conformations, aligning" % len(cluster_traj),
        # Prepare the trajectory, align to the generator, and reassign the coordinates.
        p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj)
        rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj,
                                                    p_cluster_traj, s)
        p_cluster_traj['XYZList'] = xout.copy()
        # Now save the generator / cluster to a PDB / XTC file.
        outpdb = eval(formstr_pdb) % s
        outxtc = eval(formstr_xtc) % s
        this_gen_traj = p_gens_traj[s]
        print ", saving PDB to %s" % os.path.join(output_dir, outpdb),
        this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb))
        print ", saving XTC to %s" % os.path.join(output_dir, outxtc),
        p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc))
        print ", saved"
        NowMem = float(resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss) / 1048576
        if NowMem > MaxMem:
            MaxMem = NowMem
コード例 #41
0
ファイル: project.py プロジェクト: chrismichel/msmbuilder
 def load_conf(self):
     "Load the PDB associated with this project from disk"
     return Trajectory.load_trajectory_file(self.conf_filename)
コード例 #42
0
parser.add_argument('-n','--angles',dest='angles',nargs='+',help='Angles used in calculating the PCs. One or more of [ phi, psi, chi, omega ]. Any order is fine, this script will sort them as msmbuilder.geometry.dihedral sorts them.')
parser.add_argument('-o','--out',dest='outFN',help='Output filename (should be PDF) [ DihedralPC1Weights.pdf ]', default='DihedralPC1Weights.pdf')
parser.add_argument('-N',dest='N',default=0,type=int,help='Which eigenvector to look at.')
parser.add_argument('--double',dest='double',default=False,action='store_true',help='Pass this flag if you used msmbuilder.metrics.Dihedrals, which means there is a sin and cosine entry for each angle')
options = parser.parse_args()
 
import numpy as np
from msmbuilder import io, Trajectory
from msmbuilder import metrics
from msmbuilder.geometry import dihedral
import matplotlib
matplotlib.use('pdf')
from matplotlib.pyplot import *
import os, sys, re
 
pdb = Trajectory.load_trajectory_file( options.pdbFN )

pca = io.loadh( options.pcaFN )

decInd = np.argsort( pca['vals'] )[::-1]


v0 = np.abs(pca['vecs'][:,decInd][:,options.N])
if options.double:
    if v0.shape[0] % 2:
        print "There are an odd number of entries, so --double should not be passed here, or something else has gone wrong."
        exit()
 
    n0 = v0.shape[0]
    v0 = v0[:n0/2] + v0[n0/2:]
コード例 #43
0
def positionalMutualCalculator(dir,assignFile,projectFile,gensFile,atomIndices,states):
	'''
	Mutual information Calculator for the positional Vectors of a specified \
	residues. This code is based
	of the work of Kraskov,McClendon and Lange. 
	Parameters:
	----------
	assignment File: File with the macro Assignments
	project File: The project file 
	
	iterations: how many iterations/permutations for each data
	alignment File:

	align_indices : np.ndarray or None 
						atom indices to use in the alignment step
	atom_indices : np.ndarray or None
					atom indices to use when calculating distances
	Output:
	----------
	multiple *.dat files which has mutual information for \
	each state in the assignments file
	'''
	import msmbuilder as m
	from msmbuilder import Trajectory
	import numpy as np
	import lprmsd
	import os
	from collections import defaultdict
	from IPython import parallel
	#setting up the MAP
	client_list=parallel.Client(profile='mpi')
   	print "Running on:",len(client_list.ids)
  	view = client_list.load_balanced_view()

	#Load the Atom Indices 
	atomIndices=np.loadtxt(dir+atomIndices,np.int)
	#making a dictionary for fast access to location of where the final value 
	#will end up in the matrix
	atomDict={}
	for i in atomIndices:
		atomDict[atomIndices[i]]=i
	# Load the project 
	prj = m.Project.load_from(dir+projectFile)
	#load the assignments
	macroAssignments = m.io.loadh(dir+assignFile)
	#get the actual assignment
	macroAssignments = macroAssignments['arr_0']

	macroAssignmentsMax = np.max(macroAssignments)


	#eventually Need to update this so that only certain states are tabulated

	if -1 == states:
		print "Calculating Mutual Information for all states"
		#currently going to calculate MI for all states
		states = np.arange(macroAssignmentsMax+1)

	#setting up Lee Ping's Metric which None for permute indices and \
	#alternative atom indices
	rmsd_metric=lprmsd.LPRMSD(atomIndices,None,None)

	#loading the generator file and creating a trajectory out of it.
	if os.path.exists(dir+gensFile):
		gensTraj = Trajectory.load_trajectory_file(dir+gensFile)
		pgenTraj = rmsd_metric.prepare_trajectory(gensTraj)



	#creating an inverse assignment dictionary to save all \
	#frames from all trajectories to a single 
	stateAssignmentDict=defaultdict(lambda:[])

	#{key:value} where key is the state and value is a \
	#list of tuple where each tuple has form(trjIndex,frmIndex)

	for trjIndex in xrange(macroAssignments.shape[0]):
		for frmIndex in xrange(macroAssignments.shape[1]):
			stateAssignmentDict[macroAssignments[trjIndex,frmIndex]]\
			.append((trjIndex,frmIndex))

	#number of neighbor
	k=6

	#loop through the states
	for s in states:
		mMat=np.zeros((len(atomIndices),len(atomIndices)))
		print "Calculating MI for state %s"%s
		if len(stateAssignmentDict[s])==0:
			raise ValueError('No Assignments to state %s'%s)
		#getting all conformation
		confs=stateAssignmentDict[s]
		
		#creating a frame dictionary so that i can pull those.
		FrameDict = {}
		for (traj, frame) in confs:
			FrameDict.setdefault(traj,[]).append(frame)
		
		#getting an empty traj
		clusterTraj=prj.empty_traj()
		#getting a set of what trajectories we need to query
		TrajNums=set(i[0] for i in confs)

		#getting only the frames we want for this state
		for currTrj in TrajNums:
			T=prj.load_traj(currTrj)[np.array(FrameDict[currTrj])]
			clusterTraj += T
		print "Loaded %i conformations"%len(clusterTraj)
		#Now, we should have clusterTraj, we can prepare it
		pclusterTraj=rmsd_metric.prepare_trajectory(clusterTraj)
		rmsd,xout=rmsd_metric.one_to_all_aligned(pgenTraj, pclusterTraj, s)
		#xout is the aligned trajectory, we need to subtract every value in it 
		#from the generator to the deviation from the mean
		N=len(xout)
		print N
		randomT=np.random.randint(N)
		randomI=np.random.randint(len(xout[0]))
		sanityTest=xout[randomT,randomI]

		#doing the actual subtraction
		xout=xout-np.average(xout,axis=0)
		assert((sum(xout[:,randomI])/N == np.average(xout,axis=0)[randomI]).all)


		#simple test, basically subtract the ensemble average from a random 
		#atom index at a random time step and see if they are equal. 
		sanityTestValue=(sanityTest-pgenTraj[s]['XYZList'][0,randomI])

		#assert(((xout[randomT,randomI]) == (sanityTestValue)).all())

		jobs=[]
		#for the positional vectors
		for indexTracker,atomindexI in enumerate(atomIndices):
			for indexTracker2,atomindexJ in enumerate(atomIndices[indexTracker:]):
				job=(N,k,atomindexI,atomindexJ,\
					np.hstack((xout[:,atomindexI],xout[:,atomindexJ])))
				jobs.append(job)
				#mMat[indexTracker][indexTracker2] = mutual_nearest_neighbors(N,k,data)
		results=view.map(mutual_nearest_neighbors,*zip(*jobs))
		all_mutuals = results.get()
        	for i,job in enumerate(jobs):
        		print atomDict[results[i][0]],results[i][1]
        		mMat[atomDict[results[i][0]]][atomDict[results[i][1]]]=\
        		mMat[atomDict[results[i][1]]][atomDict[results[i][0]]]=\
        		results[i][-1]

		np.savetxt('%s.dat'%s,mMat)

	return 0
コード例 #44
0
 def load_conf(self):
     "Load the PDB associated with this project from disk"
     return Trajectory.load_trajectory_file(self.conf_filename)
コード例 #45
0
 def load_traj(self, trj_index, stride=1):
     "Load the a trajectory from disk"
     filename = self.traj_filename(trj_index)
     return Trajectory.load_trajectory_file(filename, Stride=stride)
コード例 #46
0
ファイル: SaveAlignedPDBs.py プロジェクト: baxa/msmbuilder
def run(
    project,
    assignments,
    conformations_per_state,
    states,
    output_dir,
    gens_file,
    atom_indices,
    permute_indices,
    alt_indices,
    total_memory,
):
    if states == "all":
        states = np.arange(assignments.max() + 1)
    # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )}
    inverse_assignments = defaultdict(lambda: [])
    for i in xrange(assignments.shape[0]):
        for j in xrange(assignments.shape[1]):
            inverse_assignments[assignments[i, j]].append((i, j))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    print "Setting up the metric."
    rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices)
    # This trickery allows us to get the correct number of leading
    # zeros in the output file name no matter how many generators we have
    digits = len(str(max(states)))
    # Create a trajectory of generators and prepare it.
    if os.path.exists(gens_file):
        gens_traj = Trajectory.load_trajectory_file(gens_file)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '"Generator-%%0%ii.pdb"' % digits

    formstr_xtc = '"Cluster-%%0%ii.xtc"' % digits
    print "Loading up the trajectories."
    traj_nfiles, traj_bytes = get_size(project["TrajFilePath"])
    LoadAll = 0
    MaxMem = 0.0
    # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk.
    if (
        traj_bytes * 5
    ) < total_memory * 1073741824:  # It looks like the Python script uses roughly 5x the HDF file size in terms of memory.
        print "Loading all trajectories into memory."
        LoadAll = 1
        AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])]
        # print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576)

    if not os.path.exists(gens_file):
        if not "AllTraj" in locals():
            raise Exception(
                (
                    "To get away with not supplying a Gens.lh5 structure to align to for each state "
                    "you need to have enough memory to load all the trajectories simultaniously. This could be worked around..."
                )
            )
        print "Randomly Sampling from state for structure to align everything to"
        centers_list = []
        for s in states:
            chosen = inverse_assignments[np.random.randint(len(inverse_assignments[s]))]
            centers_list.append(AllTraj[chosen[0]][chosen[1]])
        gens_traj = concatenate_trajectories(centers_list)
        p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj)
        formstr_pdb = '"Center-%%0%ii.pdb"' % digits

    cluster_traj = project.GetEmptyTrajectory()
    # Loop through the generators.
    for s in states:
        if len(inverse_assignments[s]) == 0:
            raise ValueError("No assignments to state! %s" % s)
        if conformations_per_state == "all":
            confs = inverse_assignments[s]
        else:
            random.shuffle(inverse_assignments[s])
            if len(inverse_assignments[s]) >= conformations_per_state:
                confs = inverse_assignments[s][0:conformations_per_state]
            else:
                confs = inverse_assignments[s]
                print "Not enough assignments in state %s" % s
        FrameDict = {}
        for (traj, frame) in confs:
            FrameDict.setdefault(traj, []).append(frame)
        # Create a single trajectory corresponding to the frames that
        # belong to the current generator.
        if "XYZList" in cluster_traj:
            cluster_traj.pop("XYZList")
        print "Generator %i" % s,
        TrajNums = set([i[0] for i in confs])
        for i in TrajNums:
            if LoadAll:
                T = AllTraj[i][np.array(FrameDict[i])]
            else:
                T = project.LoadTraj(i)[np.array(FrameDict[i])]
            cluster_traj += T
        print " loaded %i conformations, aligning" % len(cluster_traj),
        # Prepare the trajectory, align to the generator, and reassign the coordinates.
        p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj)
        rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj, p_cluster_traj, s)
        p_cluster_traj["XYZList"] = xout.copy()
        # Now save the generator / cluster to a PDB / XTC file.
        outpdb = eval(formstr_pdb) % s
        outxtc = eval(formstr_xtc) % s
        this_gen_traj = p_gens_traj[s]
        print ", saving PDB to %s" % os.path.join(output_dir, outpdb),
        this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb))
        print ", saving XTC to %s" % os.path.join(output_dir, outxtc),
        p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc))
        print ", saved"
        NowMem = float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576
        if NowMem > MaxMem:
            MaxMem = NowMem
コード例 #47
0
Please use CalculateProjectDistance.py
===============================================================================
"""
    parser = arglib.ArgumentParser(description="""
Calculate the RMSD between an input PDB and all conformations in your project.
Output as a HDF5 file (load using msmbuilder.io.loadh())
""" + deprecationmessage)
    warnings.warn(deprecationmessage, DeprecationWarning)

    parser.add_argument('pdb')
    parser.add_argument('atom_indices',
                        help='Indices of atoms to compare',
                        default='AtomIndices.dat')
    parser.add_argument('output',
                        help='''Output file name. Output is an
        .h5 file with RMSD entries corresponding to the Assignments.h5 file.''',
                        default='Data/RMSD.h5')
    parser.add_argument('project')
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = Trajectory.load_trajectory_file(args.pdb)
    atom_indices = np.loadtxt(args.atom_indices).astype(int)

    distances = run(project, pdb, atom_indices)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
コード例 #48
0
def compute_crysol(trajectory, save_to=None):
    """
    Compute crysol for all the snapshots in an msmbuilder trajectory.
   
    Parameters
    ----------
    trajectory : msmbulder.Trajectory.trajectory
        The trajectory to compute SAXS for

    save_to : str or None
        If this is a string, will save to an h5 file of that name.    

    Returns
    -------
    q_values : np.ndarray
        The q_values at which the scattering was computed, in ()

    scattering_pred : np.ndarray
        The estimated integrated intensity for each `q_value`
    """

    setup_tmp_dir()

    if type(trajectory) == str:
        trajectory = Trajectory.load_trajectory_file(trajectory)

    os.chdir(TEMPDIR)
    scattering_pred = None

    for i in range(len(trajectory)):

        frame = trajectory[i]

        pdbfn = '%s/tmp4crysol.pdb' % TEMPDIR
        frame.save_to_pdb(pdbfn)

        # run crysol comand line
        args = ['/%s %s' % kv for kv in crysol_params.items()]
        cmd = ['crysol', pdbfn] + args
        print cmd
        subprocess.check_call(' '.join(cmd), shell=True, stdout=DEVNULL, stderr=DEVNULL)

        # parse the output
        intensities_output = 'tmp4crysol00.int'
        if not os.path.exists(intensities_output):
             raise IOError('crysol output not found')

        d = np.genfromtxt(intensities_output, skip_header=1)
        q_values = d[:,0]

        # initialize output space
        if scattering_pred == None:
            scattering_pred = np.zeros((len(trajectory), d.shape[0]))

        scattering_pred[i,:] = d[:,3]

        os.remove(pdbfn)
        os.remove(intensities_output)
        os.remove('tmp4crysol00.alm')
        os.remove('tmp4crysol00.log')


    if save_to:
        io.saveh(save_to, q_values=q_values, saxs=scattering_pred)
        print "Saved: %s" % save_to
        return
    else:
        return q_values, scattering_pred
コード例 #49
0
===============================================================================
This script is deprecated and will be removed in v2.7 
Please use CalculateProjectDistance.py
===============================================================================
"""
    parser = arglib.ArgumentParser(description="""
Calculate the RMSD between an input PDB and all conformations in your project.
Output as a HDF5 file (load using msmbuilder.io.loadh())
""" + deprecationmessage)
    warnings.warn(deprecationmessage, DeprecationWarning)
    
    parser.add_argument('pdb')
    parser.add_argument('atom_indices', help='Indices of atoms to compare',
        default='AtomIndices.dat')
    parser.add_argument('output', help='''Output file name. Output is an
        .h5 file with RMSD entries corresponding to the Assignments.h5 file.''',
        default='Data/RMSD.h5')
    parser.add_argument('project')
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = Trajectory.load_trajectory_file( args.pdb )
    atom_indices = np.loadtxt( args.atom_indices ).astype(int)

    distances = run(project, pdb, atom_indices)
    
    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
コード例 #50
0
ファイル: fahproject.py プロジェクト: jimsnyderjr/msmbuilder
    def write_trajectory(self,
                         clone_dir,
                         output_dir,
                         trajectory_number,
                         stride,
                         max_rmsd,
                         min_gens,
                         center_conformations,
                         memory_check,
                         omp_parallel_rmsd=True):
        """
        This function takes in a path to a CLONE and merges all the XTC files
        it finds into a LH5 trajectory:

        Parameters
        ----------
        clone_dir : str
            the directory in which the xtc files are found. All of the xtc files
            in this directory are joined together to make a single trajectory
            (.lh5) output file

        output_dir : str
            directory where the outputted files will be placed

        trajectory_number : int
            A unique number for this trajectory. This number is used in
            constructing the filename to write the outputted .lh5 trajectory to,
            and thus must be unique

        stride: int
            Subsample by only considering every Nth snapshop.
        max_rmsd: {int, None}
            if this value is not None, calculate the RMSD to the pdb_file from
            each snapshot and reject trajectories which have snapshots with RMSD
            greated than max_rmsd. If None, no check is performed

        min_gens : int
            Discard the trajectories that contain fewer than `min_gens` XTC files.

        center_conformations : bool
            center conformations before saving.

        memory_check : bool
            if yes, uses the memory dictionary to do an update rather than a
            complete re-convert.

        omp_parallel_rmsd : bool
            If true, use OpenMP accelerated RMSD calculation for max_rmsd check
        """

        xtc_files = self.list_xtcs_in_dir(clone_dir)

        # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 --
        # into a trajectory. If there are gaps in the xtc files in the directory, we only
        # want to use the the ones such that they are contiguously numbered
        i = 0
        for i, filename in enumerate(xtc_files):
            if self.integer_component(filename) != i:
                logger.error(
                    "Found discontinuity in xtc numbering - check data in %s",
                    clone_dir)
                xtc_files = xtc_files[0:i]
                break

        # check the memory object to see which xtc files have already been converted, and
        # exclude those from this conversion
        if memory_check:
            if clone_dir in self.memory.keys():
                previous_convert_exists = True
                num_xtcs_converted = self.memory[clone_dir][1]
                if len(
                        xtc_files
                ) == num_xtcs_converted:  # if we have converted everything,
                    logger.info(
                        "Already converted all files in %s, skipping...",
                        clone_dir)
                    return  # just bail out
                else:
                    xtc_files = xtc_files[num_xtcs_converted:]
            else:
                previous_convert_exists = False
        else:
            previous_convert_exists = False

        xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files]

        logger.info("Processing %d xtc files in clone_dir = %s",
                    len(xtc_files), clone_dir)

        if len(xtc_files) <= min_gens:
            logger.info("Skipping trajectory in clone_dir = %s", clone_dir)
            logger.info("Too few xtc files (generations).")
            return

        try:
            # [this should check for and discard overlapping snapshots]
            trajectory = Trajectory.load_from_xtc(
                xtc_file_paths,
                PDBFilename=self.pdb_topology,
                discard_overlapping_frames=True)
        except IOError as e:
            logger.error(
                "IOError (%s) when processing trajectory in clone_dir = %s", e,
                clone_dir)
            logger.error(
                "Attempting rescue by disregarding final frame, which is often"
            )
            logger.error("the first/only frame to be corrupted")

            if len(xtc_file_paths) == 1:
                logger.error(
                    "Didn't find any other frames in %s, continuing...",
                    clone_dir)
                return

            try:
                trajectory = Trajectory.load_from_xtc(
                    xtc_file_paths[0:-1], PDBFilename=self.pdb_topology)
            except IOError:
                logger.error(
                    "Unfortunately, the error remained even after ignoring the final frame."
                )
                logger.error("Skipping the trajectory in clone_dir = %s",
                             clone_dir)
                return
            else:
                logger.error(
                    "Sucessfully recovered from IOError by disregarding final frame."
                )

        if max_rmsd is not None:
            atomindices = [int(i) - 1 for i in trajectory['AtomID']]
            rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd)
            ppdb = rmsdmetric.prepare_trajectory(
                Trajectory.load_trajectory_file(self.pdb_topology))
            ptraj = rmsdmetric.prepare_trajectory(trajectory)
            rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0)

            if max(rmsds) > max_rmsd:
                logger.warning("Snapshot %d RMSD %f > the %f cutoff",
                               argmax(rmsds), max(rmsds), max_rmsd)
                logger.warning("Dropping trajectory")
                return

        if center_conformations:
            RMSD.TheoData.centerConformations(trajectory["XYZList"])

        # if we are adding to a previous trajectory, we have to load that traj up and extend it
        if previous_convert_exists:
            output_filename = self.memory[clone_dir][0]
            output_file_path = output_filename
            logger.info("Extending: %s", output_filename)
            assert os.path.exists(output_filename)

            # load the traj and extend it [this should check for and discard overlapping snapshots]
            Trajectory.append_frames_to_file(output_filename,
                                             trajectory['XYZList'][::stride],
                                             discard_overlapping_frames=True)

            num_xtcs_processed = len(
                xtc_file_paths) + self.memory[clone_dir][1]

        # if we are not adding to a traj, then we create a new one
        else:
            output_filename = 'trj%s.lh5' % trajectory_number
            output_file_path = os.path.join(output_dir, output_filename)

            if os.path.exists(output_file_path):
                logger.info("The file name %s already exists. Skipping it.",
                            output_file_path)
                return

            # stide and discard by snapshot
            trajectory['XYZList'] = trajectory['XYZList'][::stride]
            trajectory.save(output_file_path)

            num_xtcs_processed = len(xtc_file_paths)

        # log what we did into the memory object
        self.memory[clone_dir] = [output_file_path, num_xtcs_processed]

        return