def test_convert_chunked_project_1(): fetch_alanine_dipeptide() with tempdir(): root = os.path.join(get_data_home(), 'alanine_dipeptide') if sys.platform == 'win32': pattern = "*.dcd" else: pattern = "'*.dcd'" cmd = ('msmb ConvertChunkedProject out {root} --pattern {pattern} ' '-t {root}/ala2.pdb' .format(root=root, pattern=pattern)) shell(cmd) assert set(os.listdir('out')) == {'traj-00000000.dcd', 'trajectories.jsonl'} # check that out/traj-00000.dcd really has concatenated all of # the input trajs length = len(md.open('out/traj-00000000.dcd')) assert length == sum(len(md.open(f)) for f in glob.glob('%s/*.dcd' % root)) with open('out/trajectories.jsonl') as f: record = json.load(f) assert set(record.keys()) == {'filename', 'chunks'} assert record['filename'] == 'traj-00000000.dcd' assert sorted(glob.glob('%s/*.dcd' % root)) == record['chunks']
def test_open_and_load(get_fn): # These aren't tested in test_length because they don't support length! files = ['frame0.mdcrd', '4waters.arc', 'frame0.lammpstrj'] for file in files: if file.endswith('.mdcrd'): opened = md.open(get_fn(file), n_atoms=22) else: opened = md.open(get_fn(file)) loaded = md.load(get_fn(file), top=get_fn('native.pdb'))
def test_lengths(): num = 3 inptrajs = ['PROJ9761/RUN3/CLONE9/frame{}.xtc'.format(i) for i in range(num)] stride = 8 subprocess.check_call( ['gmx', 'trjcat', '-f'] + inptrajs + ['-o', 'catty.xtc'], stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) with mdtraj.open("catty.xtc") as xtc: stridelen = len(xtc) // stride remain = len(xtc) % stride assert stridelen == num * PROJ61_LENGTH_PER_GEN, (stridelen, remain) top = mdtraj.load_prmtop("tops-p9712/4bw5.prmtop") traj1 = mdtraj.load("catty.xtc", top=top)[::stride] # blarg! the last frame is duplicatey traj2 = mdtraj.load(inptrajs[0], top=top)[::stride][:-1] traj2 += mdtraj.load(inptrajs[1], top=top)[::stride][:-1] traj2 += mdtraj.load(inptrajs[2], top=top)[::stride] traj3 = mdtraj.load(inptrajs, top=top, discard_overlapping_frames=True)[::stride] np.testing.assert_array_equal(traj1.xyz, traj3.xyz) np.testing.assert_array_equal(traj1.xyz, traj2.xyz)
def run(traj_dir, conf_filename, project_filename, iext): logger.info("Rebuilding project.") file_list = glob.glob(traj_dir + "/trj*%s" % iext) num_traj = len(file_list) traj_lengths = np.zeros(num_traj, 'int') traj_paths = [] if not os.path.exists(conf_filename): raise(IOError("Cannot find conformation file %s" % conf_filename)) file_list = sorted(file_list, key=utils.keynat) for i, filename in enumerate(file_list): traj_lengths[i] = len(md.open(filename)) traj_paths.append(filename) records = { "conf_filename": conf_filename, "traj_lengths": traj_lengths, "traj_paths": traj_paths, "traj_errors": [None for i in xrange(num_traj)], "traj_converted_from": [[] for i in xrange(num_traj)] } p = Project(records) p.save(project_filename) logger.info("Wrote %s" % project_filename)
def delete_trajectory_if_broken(filename, verbose=True): """ Check the integrity of an MDTraj trajectory, deleting it if it is broken. Parameters ---------- filename : str The trajectory filename. verbose : bool, optional, default=True If True, write some logging messages if broken trajectories are detected. """ if os.path.exists(filename): try: trj = md.open(filename) except Exception as e: msg = "The integrity of trajectory file '%s' was compromised; deleting so that it will be regenerated.\n" % filename msg += "\n" msg += str(e) if verbose: print(msg) os.path.unlink(filename) # Clean up. del trj
def write(self, filename, coordinates=None): ''' Writes selected data to an output file, of format specified by the given filename's extension. Arguments: filename: Name of the file to be written. All MDTraj-supported formats are available. coordinates: An [nframes, natoms, 3] numpy array defining what will be written, else all frames in the Cofasu will be output. ''' # Note: currently ignores box data. ext = os.path.splitext(filename)[1].lower() needs_topology = ext in ['.gro', '.pdb'] if coordinates is None: coordinates = self.x.compute() with mdt.open(filename, 'w') as f: if needs_topology: f.write(coordinates, self.top) else: f.write(coordinates)
def parse_fn(self, fn): meta = { 'traj_fn': fn, 'top_fn': self.top_fn, 'top_abs_fn': os.path.abspath(self.top_fn), } try: with md.open(fn) as f: meta['nframes'] = len(f) except Exception as e: warnings.warn("Could not determine the number of frames for {}: {}" .format(fn, e), ParseWarning) if self.step_ps is not None: meta['step_ps'] = self.step_ps # Get indices ma = self.fn_re.search(fn) if ma is None: raise ValueError("Filename {} did not match the " "regular rexpression {}".format(fn, self.fn_re)) meta.update({gn: transform(ma.group(gi)) for gn, transform, gi in zip(self.group_names, self.group_transforms, range(1, len(self.group_names) + 1)) }) return meta
def _get_traj_info(self, filename): with mdtraj.open(filename, mode='r') as fh: length = len(fh) frame = fh.read(1)[0] ndim = np.shape(frame)[1] offsets = fh.offsets if hasattr(fh, 'offsets') else [] return TrajInfo(ndim, length, offsets)
def _eval_traj_shapes(self): lengths = np.zeros(self.n_trajs) n_atoms = np.zeros(self.n_trajs) for i in xrange(self.n_trajs): filename = self.traj_filename(i) with md.open(filename) as f: lengths[i] = len(f) n_atoms[i] = md.load_frame(filename, 0).n_atoms return lengths, n_atoms
def load_partial(netcdf, prmtop, start, stop, stride=1): topology = md.load_topology(prmtop) with md.open(netcdf) as f: f.seek(start) t = f.read_as_traj( topology, n_frames=int((stop-start)/stride), stride=stride, ) return t
def test_length(get_fn): files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', '2EQQ.pdb', 'frame0.binpos', 'frame0.xyz', 'frame0.tng'] if not (on_win and on_py3): files.append('frame0.lh5') for file in files: opened = md.open(get_fn(file)) loaded = md.load(get_fn(file), top=get_fn('native.pdb')) assert len(opened) == len(loaded)
def test_cnv(): proj = Processor("p1234", "data/PROJ1234", 'xa4') trajectories = [Trajectory(info, proj, None) for info in proj.get_infos()] cnv2_infos = list(map(_process_trajectory, trajectories)) assert len(cnv2_infos) > 0 for info in cnv2_infos: for g in range(len(info['cnv2']['gens'])): assert os.path.exists(info['cnv1']['gens'][g]) assert os.path.exists(info['cnv2']['gens'][g]) with mdtraj.open(info['cnv1']['gens'][g]) as tfile: with mdtraj.open(info['cnv2']['gens'][g]) as dcdfile: xyz, time, step, box = tfile.read() print("Shape", xyz.shape) assert xyz.shape == (11, 22, 3), xyz.shape xyz_nc, time, lengths, angles = dcdfile.read() np.testing.assert_array_equal(xyz[:-1] * 10, xyz_nc)
def get_num_frames(trj_file,stride): t=md.open(trj_file , mode="r", force_overwrite=False) res=t.read(stride=stride) num_frames=np.shape(res[0])[0] #t=md.open(trj_file) #num_frames=t.__len__() del t gc.collect() return num_frames
def testCacheResults(self): # cause cache failures results = {} for f in xtcfiles: results[f] = self.db[f] desired = {} for f in xtcfiles: with mdtraj.open(f) as fh: desired[f] = len(fh) self.assertEqual(results, desired)
def test_convert_chunked_project_1(): with tempdir(): root = os.path.join(get_data_home(), 'alanine_dipeptide') assert os.path.exists(root) cmd = ("msmb ConvertChunkedProject out {root} --pattern *.dcd " "-t {root}/ala2.pdb".format(root=root)) shell(cmd) assert set( os.listdir('out')) == {'traj-00000000.dcd', 'trajectories.jsonl'} # check that out/traj-00000.dcd really has concatenated all of # the input trajs length = len(md.open('out/traj-00000000.dcd')) assert length == sum( len(md.open(f)) for f in glob.glob('%s/*.dcd' % root)) with open('out/trajectories.jsonl') as f: record = json.load(f) assert set(record.keys()) == {'filename', 'chunks'} assert record['filename'] == 'traj-00000000.dcd' assert sorted(glob.glob('%s/*.dcd' % root)) == record['chunks']
def test_cnv_bw(): proj = Processor("v1", "data/v1", 'bw') trajectories = [Trajectory(info, proj, None) for info in proj.get_infos()] cnv2_infos = list(map(_process_trajectory, trajectories)) assert len(cnv2_infos) > 0 for info in cnv2_infos: assert os.path.exists(info['cnv2']['gens'][0]) assert "/cnv2" in info['cnv2']['outdir'] with mdtraj.open(info['cnv2']['gens'][0]) as tfile: xyz, time, step, box = tfile.read() print("Shape", xyz.shape) assert xyz.shape == (11, 22, 3), xyz.shape
def __set_dimensions_and_lengths(self): self._ntraj = len(self.trajfiles) # lookups pre-computed lengths, or compute it on the fly and store it in db. if config['use_trajectory_lengths_cache'] == 'True': from pyemma.coordinates.data.traj_info_cache import TrajectoryInfoCache for traj in self.trajfiles: self._lengths.append(TrajectoryInfoCache[traj]) else: for traj in self.trajfiles: with mdtraj.open(traj, mode='r') as fh: self._lengths.append(len(fh)) # number of trajectories/data sets if self._ntraj == 0: raise ValueError("no valid data")
def _get_traj_info(self, filename): with mdtraj.open(filename, mode='r') as fh: try: length = len(fh) # certain formats like txt based ones (.gro, .lammpstrj) do not implement len() except (NotImplementedError, TypeError): frame = fh.read(1)[0] ndim = np.shape(frame)[1] _ = fh.read() length = fh.tell() else: frame = fh.read(1)[0] ndim = np.shape(frame)[1] offsets = fh.offsets if hasattr(fh, 'offsets') else () return TrajInfo(ndim, length, offsets)
def test_compare_energies_acetylacetone_enol_waterbox(caplog): caplog.set_level(logging.WARNING) from transformato import FreeEnergyCalculator import mdtraj as md env = "waterbox" base = "data/acetylacetone-keto-acetylacetone-enol-rsfe/acetylacetone-enol/" ( output_files_enol, output_files_keto, ) = get_output_files_acetylaceton_tautomer_pair() conf = f"{get_testsystems_dir()}/config/test-acetylacetone-tautomer-rsfe.yaml" configuration = load_config_yaml( config=conf, input_dir="data/", output_dir="data" ) # NOTE: for preprocessing input_dir is the output dir f = FreeEnergyCalculator(configuration, "acetylacetone-enol") for idx, b in enumerate(output_files_enol): traj = md.load_dcd( f"{b}/lig_in_{env}.dcd", f"{b}/lig_in_{env}.psf", ) # used load_dcd for CHARMM traj.save_dcd(f"{base}/traj.dcd", force_overwrite=True) l_charmm = f._evaluate_e_on_all_snapshots_CHARMM(traj, idx + 1, env) # load dcd with openMM traj = md.open(f"{b}/lig_in_{env}.dcd") xyz, unitcell_lengths, _ = traj.read() xyz = xyz / 10 # correct the conversion unitcell_lengths = unitcell_lengths / 10 l_openMM = f._evaluate_e_on_all_snapshots_openMM( xyz, unitcell_lengths, idx + 1, env ) assert len(l_charmm) == len(l_openMM) s = abs(np.array(l_charmm) - np.array(l_openMM)) mae = np.sum(s) / len(s) print(mae) assert mae < 1.0 for e_charmm, e_openMM in zip(l_charmm, l_openMM): assert np.isclose(e_charmm, e_openMM, rtol=1e-2)
def test_featurereader_xtc(self): # cause cache failures with settings(use_trajectory_lengths_cache=False): reader = FeatureReader(xtcfiles, pdbfile) results = {} for f in xtcfiles: traj_info = self.db[f, reader] results[f] = traj_info.ndim, traj_info.length, traj_info.offsets expected = {} for f in xtcfiles: with mdtraj.open(f) as fh: length = len(fh) ndim = fh.read(1)[0].shape[1] offsets = fh.offsets if hasattr(fh, 'offsets') else [] expected[f] = ndim, length, offsets np.testing.assert_equal(results, expected)
def test_length(get_fn): files = [ 'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', '2EQQ.pdb', 'frame0.binpos', 'frame0.xyz', 'frame0.tng' ] if not (on_win and on_py3): files.append('frame0.lh5') for file in files: opened = md.open(get_fn(file)) if '.' + file.rsplit('.', 1)[-1] in mdtraj.core.trajectory._TOPOLOGY_EXTS: top = file else: top = 'native.pdb' loaded = md.load(get_fn(file), top=get_fn(top)) assert len(opened) == len(loaded)
def save(self, *args, **kw): if not self.pk: # new file # get length with _symlink_workaround_temp_uploaded_file(self.data) as f: with mdtraj.open(f) as traj: self.length += len(traj) # validate hash import hashlib func = hashlib.sha512() for chunk in self.data.chunks(): func.update(chunk) computed_hash = func.hexdigest() if not computed_hash == self.hash_sha512: raise ParseError(["Uploaded trajectory has different hash value than promised ", {"promised": self.hash_sha512, "received": computed_hash}]) super(Trajectory, self).save(*args, **kw)
def test_compare_energies_2OJ9_original_vacuum(caplog): caplog.set_level(logging.WARNING) from transformato import FreeEnergyCalculator import mdtraj as md env = "vacuum" base = f"{get_testsystems_dir()}/2OJ9-original-2OJ9-tautomer-rsfe/2OJ9-original/" output_files_t1, _ = get_output_files_2oj9_tautomer_pair() conf = f"{get_testsystems_dir()}/config/test-2oj9-tautomer-pair-rsfe.yaml" configuration = load_config_yaml( config=conf, input_dir=get_testsystems_dir(), output_dir=get_testsystems_dir() ) # NOTE: for preprocessing input_dir is the output dir f = FreeEnergyCalculator(configuration, "2OJ9-original") for idx, b in enumerate(output_files_t1): # used load_dcd for CHARMM traj = md.load_dcd( f"{b}/lig_in_{env}.dcd", f"{b}/lig_in_{env}.psf", ) traj.save_dcd(f"{base}/traj.dcd") l_charmm = f._evaluate_e_on_all_snapshots_CHARMM(traj, idx + 1, env) # load dcd with openMM traj = md.open(f"{b}/lig_in_{env}.dcd") xyz, unitcell_lengths, _ = traj.read() xyz = xyz / 10 # correct the conversion l_openMM = f._evaluate_e_on_all_snapshots_openMM( xyz, unitcell_lengths, idx + 1, env ) assert len(l_charmm) == len(l_openMM) s = abs(np.array(l_charmm) - np.array(l_openMM)) print(s) for e_charmm, e_openMM in zip(l_charmm, l_openMM): assert np.isclose(e_charmm, e_openMM, rtol=0.2) mae = np.sum(s) / len(s) assert mae < 0.005
def _get_traj_info(self, filename): # workaround NotImplementedError __len__ for xyz files # Github issue: markovmodel/pyemma#621 if six.PY2: from mock import patch else: from unittest.mock import patch from mdtraj.formats import XYZTrajectoryFile def _make_len_func(top): def _len_xyz(self): assert isinstance(self, XYZTrajectoryFile) assert hasattr( self, '_filename'), "structual change in xyzfile class!" import warnings from pyemma.util.exceptions import EfficiencyWarning warnings.warn( "reading all of your data," " just to determine number of frames." + " Happens only once, because this is cached." if config['use_trajectory_lengths_cache'] else "", EfficiencyWarning) # obtain len by reading whole file! mditer = mdtraj.iterload(self._filename, top=top) return sum(t.n_frames for t in mditer) return _len_xyz f = _make_len_func(self.topfile) # lookups pre-computed lengths, or compute it on the fly and store it in db. with patch.object(XYZTrajectoryFile, '__len__', f): with mdtraj.open(filename, mode='r') as fh: length = len(fh) frame = fh.read(1)[0] ndim = np.shape(frame)[1] offsets = fh.offsets if hasattr(fh, 'offsets') else [] return TrajInfo(ndim, length, offsets)
def test_lazy_eval(): import mdtraj as md base_path = f"{get_testsystems_dir()}/2OJ9-original-2OJ9-tautomer-rsfe/2OJ9-original/intst1/" dcd_path = f"{base_path}/lig_in_waterbox.dcd" psf_file = f"{base_path}/lig_in_waterbox.psf" traj = md.load( f"{dcd_path}", top=f"{psf_file}", ) with md.open(dcd_path) as f: f.seek(10) xyz, unitcell_lengths, unitcell_angles = f.read() assert (xyz.shape) == (190, 2530, 3) assert len(xyz) == 190 print(unitcell_lengths[0]) print(len(unitcell_lengths)) assert len(unitcell_lengths) == 190 print(unitcell_angles[0])
def worker(out_q, _range): g = box_crds(args.topo, np.array(args.dim)) c = list() with md.open(args.traj) as md_traj: for i in _range: md_traj.seek(i) frame = md_traj.read_as_traj(g.topo, n_frames=1, stride=1) if external_grid: ### Unit is cell matrix vectors. It will be transformed ### into frac2real within the set_frame call. Therefore we ### MUST assume that cell matrix vectors are perfectly normalized. if os.path.exists(args.grid_files + "/%d_unit.dat.gz" % i): unit = np.loadtxt(args.grid_files + "/%d_unit.dat.gz" % i) else: unit = np.loadtxt(args.grid_files + "/%d_unit.dat" % i) if os.path.exists(args.grid_files + "/%d_center.dat.gz" % i): center = np.loadtxt(args.grid_files + "/%d_center.dat.gz" % i) else: center = np.loadtxt(args.grid_files + "/%d_center.dat" % i) g.set_frame(frame, unit, center) else: g.set_frame(frame, np.eye(3, 3), args.center) c.append(g.get_inside_crds_frac()) out_q.put(c)
def test_reading_of_coords(): env = "vacuum" output_files_t1, _ = get_output_files_2oj9_tautomer_pair() conf = f"{get_testsystems_dir()}/config/test-2oj9-tautomer-pair-rsfe.yaml" configuration = load_config_yaml( config=conf, input_dir=get_testsystems_dir(), output_dir=get_test_output_dir() ) # NOTE: for preprocessing input_dir is the output dir b = output_files_t1[0] print(b) traj_load = md.load_dcd( f"{b}/lig_in_{env}.dcd", f"{b}/lig_in_{env}.psf", ) print(traj_load.xyz[0]) traj_open = md.open(f"{b}/lig_in_{env}.dcd") xyz, unitcell_lengths, _ = traj_open.read() xyz = xyz / 10 print(xyz[0]) assert np.allclose(xyz[0], traj_load.xyz[0])
def estimateDG(topfile=None,crdfile=None,pertfile=None, trajfile=None,librarypath=None): """ This subroutine loads a trajectory for an alchemical state, and a list of ligand molecules For each frame of the trajectory for each perturbed ligand Align the perturbed ligand onto reference ligand. Generate K poses Accumulate exp energy difference Estimate (w bootstrapping) free energy difference & uncertainties """ print ("HELLO ESTIMATE DG") # Setup system describing alchemical state amber = Amber() (molecules, space) = amber.readCrdTop(crdfile, topfile) morphfile = Parameter("morphfile",pertfile,""".""") system = createSystemFreeEnergy(molecules, morphfile=morphfile) cutoff_type = Parameter(".","cutoffperiodic",""".""") cutoff_dist = Parameter(".",10*angstrom,""".""") rf_dielectric = Parameter(".",82.0,""".""") shift_delta = Parameter(".",2.0,""".""") coulomb_power = Parameter(".",0,""".""") combining_rules = Parameter(".","arithmetic",""".""") lambda_val = Parameter(".",0.0,""".""") system = setupForceFieldsFreeEnergy(system, space, cutoff_type=cutoff_type, cutoff_dist=cutoff_dist, rf_dielectric=rf_dielectric, shift_delta=shift_delta, coulomb_power=coulomb_power, combining_rules=combining_rules, lambda_val=lambda_val) # Load ligands library # FIX ME ! Don't include ligands that have already been simulated ! library = loadLibrary(librarypath) library_deltaenergies = {} # library_deltaenergies contain the list of computed energy differences for ligand in library: library_deltaenergies[ligand] = [] #import pdb; pdb.set_trace() # Now scan trajectory start_frame = 1 end_frame = 3 step_frame = 1 trajfile = Parameter(".",trajfile,""".""") mdtraj_trajfile = mdtraj.open(trajfile.val,'r') nframes = len(mdtraj_trajfile) if end_frame > (nframes - 1): end_frame = nframes - 1 mdtraj_trajfile.seek(start_frame) current_frame = start_frame energies = {} for (ID, ligand) in library: energies[ID] = [] while (current_frame <= end_frame): print ("#Processing frame %s " % current_frame) frames_xyz, cell_lengths, cell_angles = mdtraj_trajfile.read(n_frames=1) system = updateSystemfromTraj(system, frames_xyz, cell_lengths, cell_angles) ref_ligand = system[MGName("solutes")].molecules().first().molecule() ref_nrg = system.energy() print (ref_nrg) for (ID, ligand) in library: # Align ligand onto reference ligand mapping = AtomMCSMatcher(1*second).match(ref_ligand, PropertyMap(), ligand, PropertyMap()) mapper = AtomResultMatcher(mapping) # This does a RB alignment # TODO) Explore optimised alignment codes # For instance could construct aligned ligand by reusing MCSS coordinates # and completing topology for variable part using BAT internal coordinates # Also, better otherwise never get intramolecular energy variations ! # Basic test...SAME LIGAND should give 0 energy difference ! # FIXME) Return multiple coordinates and update system in each instance aligned_ligand = ligand.move().align(ref_ligand, AtomMatchInverter(mapper)) #print (ref_ligand.property("coordinates").toVector()) #print ("####") #print (aligned_ligand.property("coordinates").toVector()) # FIXME) Optimise for speed new_system = System() new_space = system.property("space") new_system.add( system[MGName("solvent")] ) sols = MoleculeGroup("solutes") solref = MoleculeGroup("solute_ref") solhard = MoleculeGroup("solute_ref_hard") soltodummy = MoleculeGroup("solute_ref_todummy") solfromdummy = MoleculeGroup("solute_ref_fromdummy") sols.add(aligned_ligand) solref.add(aligned_ligand) solhard.add(aligned_ligand) new_system.add(sols) new_system.add(solref) new_system.add(solhard) new_system.add(soltodummy) new_system.add(solfromdummy) #print ("###") # DONE) Optimise for speed, only doing ligand energies #print (new_system[MGName("solutes")].first().molecule().property("coordinates").toVector()) new_system = setupForceFieldsFreeEnergy(new_system, new_space, cutoff_type=cutoff_type, cutoff_dist=cutoff_dist, rf_dielectric=rf_dielectric, shift_delta=shift_delta, coulomb_power=coulomb_power, combining_rules=combining_rules, lambda_val=lambda_val) new_nrg = new_system.energy() print (new_nrg) energies[ID].append( new_nrg - ref_nrg ) # for each conformation generated # consider further optimisation (rapid MC --> if loaded flex files?) # update 'perturbed' group with aligned ligand coordinates # compute 'perturbed' energy # accumulate 'perturbed' - reference #import pdb; pdb.set_trace() current_frame += step_frame import pdb; pdb.set_trace() # Now convert accumulated data int return 0
def test_len(get_fn): with md.open(get_fn('frame0.xyz')) as fh: assert len(fh) == 501 assert fh._frame_index == 0 assert len(fh.read()) == 501
Sire.Stream.save((molecules, space), s3file.val) # What to do with this... system = createSystemFreeEnergy(molecules) lam = Symbol("lambda") solutes = system[MGName("solutes")] solute_ref = system[MGName("solute_ref")] system.setConstant(lam, lambda_val.val) system.add(PerturbationConstraint(solutes)) system.setComponent(lam, lambda_val.val) # Now loop over snapshots in dcd and accumulate energies start_frame = 1 end_frame = 1000000000 step_frame = stepframe.val mdtraj_trajfile = mdtraj.open(trajfile.val,'r') nframes = len(mdtraj_trajfile) if end_frame > (nframes - 1): end_frame = nframes - 1 mdtraj_trajfile.seek(start_frame) current_frame = start_frame #system = createSystemFreeEnergy(molecules) system_solute_rf = System() system_solute_rf.add(solutes) system_solute_rf.add(system[MGName("solute_ref")]) system_solute_rf.add(system[MGName("solute_ref_hard")]) system_solute_rf.add(system[MGName("solute_ref_todummy")]) system_solute_rf.add(system[MGName("solute_ref_fromdummy")])
def test_len(): with md.open(get_fn('frame0.xyz')) as fh: assert len(fh) == 501 assert fh._frame_index == 0 assert len(fh.read()) == 501
def handle(self, *args, **options): def create_pos_to_gnum(dyn_id): gcdata_path="/protwis/sites/files/Precomputed/get_contacts_files/dynamic_symlinks/dyn%(dynid)s/dyn%(dynid)s_labels.tsv" % {"dynid" :dyn_id} if os.path.isfile(gcdata_path): pos_to_gnum={} with open(gcdata_path) as infile: for line in infile: if len(line) == 0 or line[0] == "#": continue (pos,gnum) = line.split("\t") pos_to_gnum[pos]=gnum.rstrip("\n") return pos_to_gnum else: return False def extract_res_info(res1): helix_to_treep={"1" :"2", "12":"3", "2" :"4" , "23":"5" , "3" :"6" , "34":"7" , "4" :"8" , "45":"9" , "5" :"10" , "56":"11" , "6" :"12" , "67":"13" , "7" :"14" , "78":"15" , "8" :"16"} nodecolor = {'1': '#78C5D5', '12': '#5FB0BF', '2': '#459BA8', '23': '#5FAF88', '3': '#79C268', '34': '#9FCD58', '4': '#C5D747', '45': '#DDD742', '5': '#F5D63D', '56': '#F3B138', '6': '#F18C32', '67': '#ED7A6A', '7': '#E868A1', '78': '#D466A4', '8': '#BF63A6'} res1=res1[:res1.rfind(":")] if res1 in pos_to_gnum: gnum1=pos_to_gnum[res1] h1=gnum1.split("x")[0] if h1=="Ligand": return False treep1=helix_to_treep[h1] + "." + gnum1 nodecolor1 = nodecolor[h1] return (gnum1,treep1,nodecolor1,h1) else: return False def create_p_jsons(dynfiles_traj,pos_to_gnum): frame_ends_bytraj=[] accum_frames=0 all_int_d=dict() hb_list=["hbbb","hbsb","hbss"] for traj in dynfiles_traj: n_frames=traj["n_frames"] frame_ends_bytraj.append(n_frames+accum_frames -1) #Starts by 0 accum_frames+=n_frames cont_li=['sb', 'pc', 'ps', 'ts', 'vdw', 'hb', 'wb', 'wb2', 'hp'] traj_int_d={e:dict() for e in cont_li} for int_d in traj_int_d.values(): int_d['defaults']={'edgeColor': 'rgba(50,50,50,100)', 'edgeWidth': 2} int_d["trees"]=[{ "treeLabel" :'Helices', "treePaths":set(), # ex. '2.1x30' }] int_d["edges"]=dict() #list of {'name1':, 'frames':, 'name2':, 'helixpos':} -> helixpos can be Intra or Inter #ex: {"frames": [ 0, 10, 12], 'helixpos': 'Intra', 'name1': '5x38', 'name2': '5x39'} int_d["tracks"]=[ {"trackLabel":"Helices", "trackProperties" :list() # {'color': '#79C268', 'nodeName': '3x25', 'size': '1.0'} } ] all_int_d[traj["file_id"]]=traj_int_d gcdata_path="/protwis/sites/files/Precomputed/get_contacts_files/dynamic_symlinks/dyn%(dynid)s/dyn%(dynid)s_dynamic.tsv" % {"dynid" :dynfiles_traj[0]["dyn_id"]} traj_rep=0 pre_frame=False accum_frames=0 if os.path.isfile(gcdata_path): with open(gcdata_path) as infile: for line in infile: line = line.strip() if "total_frames" in line: el = line.split(" ") file_total_frames=int(el[1].split(":")[1]) if len(line) == 0 or line[0] == "#": continue allinfo = line.split("\t") if len(allinfo)==4: (frame,int_type,res1,res2)=allinfo elif len(allinfo)==5: (frame,int_type,res1,res2,res3)=allinfo elif len(allinfo)==6: (frame,int_type,res1,res2,res3,res4)=allinfo else: self.stdout.write(self.style.NOTICE("Incorrect number of elements in line. Skipping. Line: %s"%line)) continue if frame != pre_frame: if int(pre_frame) == frame_ends_bytraj[traj_rep]: traj_rep+=1 accum_frames=int(frame) self.stdout.write(self.style.NOTICE("\tTraj id: %s"%dynfiles_traj[traj_rep]["file_id"])) frame_corr=str(int(frame)-accum_frames) traj_id=dynfiles_traj[traj_rep]["file_id"] traj_int_d=all_int_d[traj_id] #add all res: resinfo1=extract_res_info(res1) resinfo2=extract_res_info(res2) if resinfo1 and resinfo2: (gnum1,treep1,nodecolor1,h1)=resinfo1 (gnum2,treep2,nodecolor2,h2)=resinfo2 else: continue for int_typeid, int_data in traj_int_d.items(): if treep1 not in int_data["trees"][0]["treePaths"]: int_data["trees"][0]["treePaths"].add(treep1) # ex. '2.1x30' int_data["tracks"][0]["trackProperties"].append({'color': nodecolor1, 'nodeName': gnum1, 'size': '1.0'}) if treep2 not in int_data["trees"][0]["treePaths"]: int_data["trees"][0]["treePaths"].add(treep2) int_data["tracks"][0]["trackProperties"].append({'color': nodecolor2, 'nodeName': gnum2, 'size': '1.0'}) #add this particular inteaction if int_type in hb_list: int_type="hb" if int_type in traj_int_d: edge_d=traj_int_d[int_type]["edges"] if (gnum1,gnum2) in edge_d: edge_d[(gnum1,gnum2)]["frames"].append(frame_corr) elif (gnum2,gnum1) in edge_d: edge_d[(gnum2,gnum1)]["frames"].append(frame_corr) else: if (h1==h2): hpos="Intra" else: hpos="Inter" edge_d[(gnum1,gnum2)] = {'name1':gnum1 , 'name2':gnum2 , 'frames':[frame_corr], 'helixpos':hpos} pre_frame=frame for traj_id,traj_int_d in all_int_d.items(): for int_type, int_data in traj_int_d.items(): int_data["trees"][0]["treePaths"] = list(int_data["trees"][0]["treePaths"]) int_data["edges"] = [v for k,v in int_data["edges"].items()] save_json(dynfiles_traj,traj_id,int_type,int_data) return True else: return False def save_json(dynfiles_traj,traj_id,int_type,int_data): traj_filename=[e["file_name"] for e in dynfiles_traj if e['file_id']==traj_id][0] json_filename=traj_filename.split(".")[0] + "_" + int_type +".json" filpath=get_precomputed_file_path('flare_plot',int_type,url=False) if not os.path.isdir(filpath): os.makedirs(filpath) with open(os.path.join(filpath,json_filename), 'w') as outfile: json.dump(int_data, outfile) if options['ignore_publication']: dynobj=DyndbDynamics.objects.all() else: dynobj=DyndbDynamics.objects.filter(is_published=True) if options['dynamics_id']: dynobj=dynobj.filter(id__in=options['dynamics_id']) if dynobj == []: self.stdout.write(self.style.NOTICE("No dynamics found with specified conditions.")) dynobj = dynobj.annotate(dyn_id=F('id')) dynobj_d = dynobj.values("id") dyn_id_li=[d["id"] for d in dynobj_d] dyn_traj_d={} i=0 tot=len(dyn_id_li) for dyn_id in sorted(dyn_id_li): try: self.stdout.write(self.style.NOTICE("dyn %s - %.1f%% completed"%(dyn_id , (i/tot)*100) )) dynfiles = DyndbFilesDynamics.objects.filter(id_dynamics__id=dyn_id) dynfiles = dynfiles.annotate(file_name=F("id_files__filename"),file_path=F("id_files__filepath"),file_id=F('id_files__id')) dynfiles_traj = dynfiles.filter(type=2) dynfiles_traj = dynfiles_traj.values("file_name","file_path","file_id") print(dynfiles_traj) for traj in dynfiles_traj: traj_path=traj["file_path"] traj["dyn_id"]=dyn_id if os.path.isfile(traj_path): t=md.open(traj_path) n_frames=t.__len__() del t gc.collect() traj["n_frames"]=n_frames else: traj["n_frames"]=False pos_to_gnum=create_pos_to_gnum(dyn_id) if not pos_to_gnum: self.stdout.write(self.style.ERROR("Labels file not found. Skipping." )) continue result=create_p_jsons(dynfiles_traj,pos_to_gnum) if not result: self.stdout.write(self.style.ERROR("GetContacts results file not found. Skipping." )) continue dyn_traj_d[dyn_id]=dynfiles_traj i+=1 except Exception as e: self.stdout.write(self.style.ERROR(e)) self.stdout.write(self.style.NOTICE("100%" ))
def calculate_grid_quantities(self, energy=True, entropy=True, hbonds=True): """ Performs grid-based solvation thermodynamics and structure calculations by iterating over frames in the trajectory. Parameters ---------- energy : bool, optional entropy : hbonds : Returns ------- """ print_progress_bar(0, self.num_frames) if not self.topology_file.endswith(".h5"): topology = md.load_topology(self.topology_file) read_num_frames = 0 with md.open(self.trajectory) as f: for frame_i in range(self.start_frame, self.start_frame + self.num_frames): print_progress_bar(frame_i - self.start_frame, self.num_frames) f.seek(frame_i) if not self.trajectory.endswith(".h5"): trj = f.read_as_traj(topology, n_frames=1, stride=1) else: trj = f.read_as_traj(n_frames=1, stride=1) if trj.n_frames == 0: print("No more frames to read.") break else: self._process_frame(trj, energy, hbonds, entropy) read_num_frames += 1 if read_num_frames < self.num_frames: print(("{0:d} frames found in the trajectory, resetting self.num_frames.".format(read_num_frames))) self.num_frames = read_num_frames # Normalize voxel quantities for voxel in range(self.voxeldata.shape[0]): if self.voxeldata[voxel, 4] > 1.0: self.voxeldata[voxel, 14] = self.voxeldata[voxel, 13] / (self.voxeldata[voxel, 4] * 2.0) self.voxeldata[voxel, 13] /= (self.num_frames * self.voxel_vol * 2.0) self.voxeldata[voxel, 16] = self.voxeldata[voxel, 15] / (self.voxeldata[voxel, 4] * 2.0) self.voxeldata[voxel, 15] /= (self.num_frames * self.voxel_vol * 2.0) if self.voxeldata[voxel, 19] > 0.0: self.voxeldata[voxel, 18] = self.voxeldata[voxel, 17] / (self.voxeldata[voxel, 19] * 2.0) self.voxeldata[voxel, 17] /= (self.num_frames * self.voxel_vol * self.voxeldata[voxel, 19] * 2.0) for i in range(19, 35, 2): self.voxeldata[voxel, i + 1] = self.voxeldata[voxel, i] / self.voxeldata[voxel, 4] self.voxeldata[voxel, i] /= (self.num_frames * self.voxel_vol) else: self.voxeldata[voxel, 13] *= 0.0 self.voxeldata[voxel, 15] *= 0.0 if self.voxeldata[voxel, 19] > 0.0: self.voxeldata[voxel, 17] *= 0.0 for i in range(19, 35, 2): self.voxeldata[voxel, i] *= 0.0 # Calculate entropies if entropy: self.calculate_entropy(num_frames=self.num_frames)
def f(): try: eq(len(md.open(get_fn(file), **kwargs)), len(md.load(get_fn(file), top=get_fn('native.pdb')))) except NotImplementedError as e: raise SkipTest(e)
import collections import glob import mdtraj as md import os import pandas as pd dt = 0.25 MIN_LENGTHS = collections.defaultdict(lambda : 1000 * 4) MIN_LENGTHS[10478] = 500 * 4 projects = [10466, 10467, 10468, 10478] names = {10466:"T4", 10467:"src", 10468:"abl", 10478:"setd8"} data = [] for project in projects: path = "%s/%d/" % (os.environ["FAH_DATA_PATH"], project) min_length = MIN_LENGTHS[project] filenames = [filename for filename in glob.glob(path + "run*.h5")] lengths = [len(md.open(filename)) for filename in filenames] trimmed_lengths = [length for length in lengths if length > min_length] trimmed_ns = sum(trimmed_lengths) * dt n_traj = len(lengths) n_trimmed = len(trimmed_lengths) name = names[project] data.append(dict(project=project, frames=sum(lengths), trimmed_frames=sum(trimmed_lengths), trimmed_ns=trimmed_ns, n_trimmed=n_trimmed, n_traj=n_traj, name=name)) data = pd.DataFrame(data).set_index("project") print data.to_html()
def generate_clusters(self, density_factor, ligand_file, clustercenter_file): """Generate hydration sites from water molecules found in the binding site during the simulation. Clustering is done in two steps; i). An initial clustering over a 10% of frames, and ii). A refinement step where all frames are used. Parameters ---------- ligand_file : string Name of the PDB file containing atomic coordinates of the ligand, assumed to be co-crystallized with the protein. Returns ------- final_cluster_coords : numpy.ndarray Coordinates of hydration sites, represented by a 2-D array with shape N x 3, where N is the number of hydration sites identified during clustering. site_waters : list List of N sub-lists where N is the number of identified hydration sites, each sublist consist of a 3-element tuple for every water identified in that site. First element of the tuple is frame number, second element is correct index of the oxygen atom in the the original topology and third element is the offset index as read from a version of a trimmed version trajectory for clustering. Notes ----- The following attributes of the object are updated when the clustering is successfully completed. self.hsa_region_O_ids: The indices of water oxygen atoms in HSA region for each frame are stored in the corresponding lists. self.hsa_region_flat_ids: Same as above except that indices are not atom indices from the topology but in a sequence from 0 to N, where N is the total number of water oxygen atoms found in the HSA region throughout the simulation. self.hsa_region_water_coords: An N x 3 numpy array is initialized, where N is the total number of water water oxygen atoms found in the HSA region throughout the simulation. The array gets populated during individual frame processing. """ sphere_radius = md.utils.in_units_of(1.0, "angstroms", "nanometers") topology = md.load_topology(self.topology_file) if self.non_water_atom_ids.shape[0] == 0: raise Exception( ValueError, "Clustering is supported only for solute-solvent systems, no solute atoms found." ) ligand = md.load_pdb(ligand_file, no_boxchk=True) ligand_coords = ligand.xyz[0, :, :] binding_site_atom_indices = np.asarray( list(range(ligand_coords.shape[0]))) init_cluster_coords = None # Step 1: Initial Clustering if user didn't provide cluster centers if clustercenter_file is None: clustering_stride = 10 print("Reading trajectory for clustering.") with md.open(self.trajectory) as f: f.seek(self.start_frame) # read all frames if no frames specified by user if self.num_frames is None: trj_short = f.read_as_traj( topology, atom_indices=np.concatenate( (binding_site_atom_indices, self.wat_oxygen_atom_ids )))[self.start_frame::clustering_stride] else: trj_short = f.read_as_traj( topology, atom_indices=np.concatenate((binding_site_atom_indices, self.wat_oxygen_atom_ids)) )[self.start_frame:self.num_frames:clustering_stride] print(trj_short.n_frames) if trj_short.n_frames < 10: sys.exit( "Clustering requires at least 100 frames, current trajectory contains {0:d} frames." .format(trj_short.n_frames)) print("Performing an initial clustering over {0:d} frames.". format(trj_short.n_frames)) # Obtain water molecules solvating the binding site # FIXME: This is a workaround to use MDTraj compute_neighbor function xyz coordinates of the trajectory are # modified such that first n atoms coordinates are switched to n atoms of ligand coordinates. # Unexpected things will happen if the number of solute atoms less than the number of ligand atoms, which is # highly unlikely. coords = trj_short.xyz for i_frame in range(trj_short.n_frames): for pseudo_index in range( binding_site_atom_indices.shape[0]): coords[i_frame, pseudo_index, :] = ligand_coords[ pseudo_index, :] haystack = np.setdiff1d(trj_short.topology.select("all"), binding_site_atom_indices) binding_site_waters = md.compute_neighbors( trj_short, self.hsa_region_radius, binding_site_atom_indices, haystack_indices=haystack) # generate a list of tuples, each tuple is a water and corresponding frame number in trj_short water_id_frame_list = [(i, nbr) for i in range(len(binding_site_waters)) for nbr in binding_site_waters[i]] # Start initial clustering by building a KDTree and get initial neighbor count for all waters water_coordinates = np.ma.array( [coords[wat[0], wat[1], :] for wat in water_id_frame_list], mask=False) tree = spatial.cKDTree(water_coordinates) nbr_list = tree.query_ball_point(water_coordinates, sphere_radius) nbr_count_list = np.ma.array([len(nbrs) for nbrs in nbr_list], mask=False) cutoff = trj_short.n_frames * density_factor * 0.1401 if np.ceil(cutoff) - cutoff <= 0.5: cutoff = np.ceil(cutoff) else: cutoff = np.floor(cutoff) n_wat = 3 * cutoff # Set up clustering loop cluster_list = [] cluster_iter = 0 while n_wat > cutoff: # Get water with max nbrs and retrieve its neighbors and marked for exclusion in next iteration max_index = np.argmax(nbr_count_list) to_exclude = np.array(nbr_list[max_index]) # Set current water count to current neighbors plus one for the water itself n_wat = len(to_exclude) + 1 # Mask current water, its neighbors so that they are not considered in the next iteration nbr_count_list.mask[to_exclude] = True nbr_count_list.mask[max_index] = True # Mask current waters' and its neighbors' coords so that they are not considered in the next iteration water_coordinates.mask[to_exclude] = True water_coordinates.mask[max_index] = True # Accumulate neighbors for each water in current cluster, removing common neighbors nbrs_of_to_exclude = np.unique( np.array([ n_excluded for excluded_nbrs in nbr_list[to_exclude] for n_excluded in excluded_nbrs ])) # Obtain the list of waters whose neighbors need to be updated due to exclusion of the waters above to_update = np.setxor1d(to_exclude, nbrs_of_to_exclude) to_update = np.setdiff1d(to_update, np.asarray(max_index)) # Update the neighbor count for each water from the list generated above if to_update.shape[0] != 0: tree = spatial.cKDTree(water_coordinates) updated_nbr_list = tree.query_ball_point( water_coordinates[to_update], sphere_radius) # for each updated member, get its original index and update the original neighbor search list for index, nbrs in enumerate(updated_nbr_list): if not nbr_count_list.mask[to_update[index]]: nbr_count_list[to_update[index]] = len(nbrs) # Check distances with previously identified clusters and do not consider if within 1.2 A # of an existing cluster current_wat = water_id_frame_list[max_index] current_wat_coords = md.utils.in_units_of( coords[current_wat[0], current_wat[1], :], "nanometers", "angstroms") near_flag = 0 if len(cluster_list) != 0: for clust in cluster_list: clust_coords = coords[clust[0], clust[1], :] dist = np.linalg.norm(current_wat_coords - clust_coords) if dist < 1.20: near_flag += 1 if near_flag == 0: cluster_iter += 1 cluster_list.append(water_id_frame_list[max_index]) init_cluster_coords = [ coords[cluster[0], cluster[1], :] for cluster in cluster_list ] else: clusters_pdb_file = md.load_pdb(clustercenter_file, no_boxchk=True) init_cluster_coords = clusters_pdb_file.xyz[0, :, :] # Read full trajectory print("Reading trajectory to obtain water molecules for each cluster.") with md.open(self.trajectory) as f: f.seek(self.start_frame) if self.num_frames is None: trj = f.read_as_traj(topology, stride=1, atom_indices=np.concatenate( (binding_site_atom_indices, self.wat_oxygen_atom_ids))) self.num_frames = trj.n_frames else: trj = f.read_as_traj(topology, n_frames=self.num_frames, stride=1, atom_indices=np.concatenate( (binding_site_atom_indices, self.wat_oxygen_atom_ids))) if trj.n_frames < self.num_frames: print(( "Warning: {0:d} frames found in the trajectory, resetting self.num_frames." .format(trj.n_frames))) self.num_frames = trj.n_frames for i_frame in range(trj.n_frames): for pseudo_index in range(binding_site_atom_indices.shape[0]): trj.xyz[i_frame, pseudo_index, :] = ligand_coords[pseudo_index, :] haystack = np.setdiff1d(trj.topology.select("all"), binding_site_atom_indices) start_point = haystack[0] binding_site_waters = md.compute_neighbors( trj, self.hsa_region_radius, binding_site_atom_indices, haystack_indices=haystack) # From the full frame-wise set of waters in the binding site, build two more frame-wise lists # one where each frame has a correct index of waters and another with a new index which ranges from # 0 to M, where M is the total number of hsa region waters - 1 start = 0 for i in range(len(binding_site_waters)): self.hsa_region_O_ids.append([]) self.hsa_region_flat_ids.append([]) for wat in binding_site_waters[i]: wat_0 = wat - start_point wat_offset = ( wat_0 * self.water_sites) + self.wat_oxygen_atom_ids[0] self.hsa_region_O_ids[i].append(wat_offset) self.hsa_region_flat_ids[i].append(start) start += 3 water_id_frame_list = [(i, nbr) for i in range(len(binding_site_waters)) for nbr in binding_site_waters[i]] water_coordinates = np.array( [trj.xyz[wat[0], wat[1], :] for wat in water_id_frame_list]) # Initialize array that stores coordinates all water molecules in HSA region, used for entropy calcs self.hsa_region_water_coords = np.zeros( (len(water_id_frame_list) * 3, 3), dtype=float) tree = spatial.cKDTree(water_coordinates) nbr_list = tree.query_ball_point(init_cluster_coords, sphere_radius) final_cluster_coords = [] cutoff = int(self.num_frames * density_factor * 0.1401) if np.ceil(cutoff) - cutoff <= 0.5: cutoff = np.ceil(cutoff) else: cutoff = np.floor(cutoff) # apply refinement if user defined clusters not provided if clustercenter_file is None: # Step 2: Refinement # Initialize variables and data structures # Read in the trajectory but only first N solute atoms where N equals the number of ligand atoms # plus all water oxygen atoms # WARNING: This shifts indices of waters and once they are assigned to clusters, the indices need to # be corrected. print(( "Refining initial cluster positions by considering {0:d} frames." .format(self.num_frames))) # For each cluster, set cluster center equal to geometric center of all waters in the cluster site_waters = [] cluster_index = 1 for cluster in nbr_list: cluster_water_coords = water_coordinates[cluster] if len(cluster) > cutoff: near_flag = 0 waters_offset = [ (water_id_frame_list[wat][0] + self.start_frame, ((water_id_frame_list[wat][1] - start_point) * self.water_sites) + self.wat_oxygen_atom_ids[0]) for wat in cluster ] com = np.zeros(3) masses = np.ones(cluster_water_coords.shape[0]) masses /= masses.sum() com[:] = water_coordinates[cluster].T.dot(masses) cluster_center = com[:] # Raise flag if the current cluster center is within 1.2 A of existing cluster center for other, coord in enumerate(final_cluster_coords[:-1]): dist = np.linalg.norm( md.utils.in_units_of(cluster_center, "nanometers", "angstroms") - coord) if dist < 1.20: near_flag += 1 # Only add cluster center if it is at a safe distance from others if near_flag == 0: final_cluster_coords.append( md.utils.in_units_of(cluster_center, "nanometers", "angstroms")) site_waters.append(waters_offset) cluster_index += 1 # otherwise store data for each user defined cluster else: # For each cluster, set cluster center equal to geometric center of all waters in the cluster final_cluster_coords = md.utils.in_units_of( init_cluster_coords, "nanometers", "angstroms") site_waters = [] cluster_index = 1 for cluster in nbr_list: waters_offset = [ (water_id_frame_list[wat][0] + self.start_frame, ((water_id_frame_list[wat][1] - start_point) * self.water_sites) + self.wat_oxygen_atom_ids[0]) for wat in cluster ] site_waters.append(waters_offset) cluster_index += 1 # Write clustercenter file write_watpdb_from_coords("clustercenterfile", final_cluster_coords) self.clustercenter_file = "clustercenterfile.pdb" print(("Final number of clusters: {0:d}".format( len(final_cluster_coords)))) return np.asarray(final_cluster_coords), site_waters
import mdtraj as md import os import glob filenames = glob.glob('trj*') lengths = [] for filename in filenames: f = md.open(filename) lengths.append(len(f)) f.close() for k in enumerate(lengths): if k[1] > 20: print k os.symlink('../agonist_b2ar/%s' % filenames[k[0]], '../agonist_b2ar_processed/%s' % filenames[k[0]])
Sire.Stream.save((molecules, space), s3file.val) # What to do with this... system = createSystemFreeEnergy(molecules) lam = Symbol("lambda") solutes = system[MGName("solutes")] solute_ref = system[MGName("solute_ref")] system.setConstant(lam, lambda_val.val) system.add(PerturbationConstraint(solutes)) system.setComponent(lam, lambda_val.val) # Now loop over snapshots in dcd and accumulate energies start_frame = 1 end_frame = 1000000000 step_frame = stepframe.val mdtraj_trajfile = mdtraj.open(trajfile.val, 'r') nframes = len(mdtraj_trajfile) if end_frame > (nframes - 1): end_frame = nframes - 1 mdtraj_trajfile.seek(start_frame) current_frame = start_frame #system = createSystemFreeEnergy(molecules) system_solute_rf = System() system_solute_rf.add(solutes) system_solute_rf.add(system[MGName("solute_ref")]) system_solute_rf.add(system[MGName("solute_ref_hard")]) system_solute_rf.add(system[MGName("solute_ref_todummy")]) system_solute_rf.add(system[MGName("solute_ref_fromdummy")])
def get_num_frames(trj_file,stride): t=md.open(trj_file , mode="r", force_overwrite=False) res=t.read(stride=stride) num_frames=np.shape(res[0])[0] return num_frames
def _merge_trajs(self) -> Tuple[dict, dict, int, dict]: """ load trajectories, thin trajs and merge themn. Also calculate N_k for mbar. """ ############# # set all file paths for potential if not os.path.isdir(f"{self.base_path}"): raise RuntimeError(f"{self.base_path} does not exist. Aborting.") nr_of_states = len(next(os.walk(f"{self.base_path}"))[1]) logger.info(f"Evaluating {nr_of_states} states.") snapshots, unitcell = {}, {} N_k: dict = defaultdict(list) start, stride = -1, -1 for env in self.envs: confs = [] unitcell_ = [] conf_sub = self.configuration["system"][self.structure][env] for lambda_state in tqdm(range(1, nr_of_states + 1)): dcd_path = f"{self.base_path}/intst{lambda_state}/{conf_sub['intermediate-filename']}.dcd" psf_path = f"{self.base_path}/intst{lambda_state}/{conf_sub['intermediate-filename']}.psf" if not os.path.isfile(dcd_path): raise RuntimeError(f"{dcd_path} does not exist.") traj = mdtraj.open(f"{dcd_path}") # read trajs, determin offset, start ,stride and unitcell lengths if start == -1: xyz, unitcell_lengths, _ = traj.read() xyz, start, stride = self._thinning(xyz) else: traj.seek(start) xyz, unitcell_lengths, _ = traj.read(stride=stride) xyz = xyz[:self.nr_of_max_snapshots] logger.debug( f"Len: {len(xyz)}, Start: {start}, Stride: {stride}") # check that we have enough samples if len(xyz) < 10: raise RuntimeError( f"Below 10 conformations per lambda ({len(traj)}) -- decrease the thinning factor (currently: {self.thinning})." ) # thin unitcell_lengths # make sure that we can work with vacuum environments if env != "vacuum": unitcell_lengths = unitcell_lengths[:self. nr_of_max_snapshots] else: unitcell_lengths = np.zeros(len(xyz)) confs.extend(xyz / 10) unitcell_.extend(unitcell_lengths / 10) logger.debug(f"{dcd_path}") logger.debug(f"Nr of snapshots: {len(xyz)}") N_k[env].append(len(xyz)) self.traj_files[env].append((dcd_path, psf_path)) logger.info(f"Combined nr of snapshots: {len(confs)}") snapshots[env] = confs unitcell[env] = unitcell_ assert len(confs) == len(unitcell_) logger.debug(len(confs)) logger.debug(N_k) return (snapshots, unitcell, nr_of_states, N_k)
""" Make trajectory symlinks in local trajectories directory. Looks for FAH data at the location given by environment variable FAH_DATA_PATH """ import glob import mdtraj as md import os RUN = None # Set to either None or the run number of interest. PROJECT = 10468 MIN_LENGTH = 1000 * 4 # kinase, T4 #MIN_LENGTH = 400 * 4 # setd8 PATH = "%s/%d/" % (os.environ["FAH_DATA_PATH"], PROJECT) filenames = [filename for filename in glob.glob(PATH + "run*.h5") if len(md.open(filename)) > MIN_LENGTH] try: os.mkdir("./trajectories/") except: pass for filename in filenames: if RUN is None or "run%d" % RUN in filename: base_filename = os.path.split(filename)[1] out_filename = "./trajectories/%s" % (base_filename) if not os.path.exists(out_filename): os.symlink(filename, out_filename)
def __determine_len(self, filename): with mdtraj.open(filename) as fh: return len(fh)
def process_chunk(self, begin_chunk, chunk_size, topology, energy, hbonds, entropy): nbr_cutoff_sq = 3.5**2 with md.open(self.trajectory) as f: f.seek(begin_chunk) trj = f.read_as_traj(topology, n_frames=chunk_size, stride=1) trj.xyz *= 10.0 pbc = md.utils.in_units_of(trj.unitcell_lengths, "nanometers", "angstroms") frame_data = [[] for i in range(trj.n_frames)] calc.assign_voxels(trj.xyz, self.dims, self.gridmax, self.origin, frame_data, self.wat_oxygen_atom_ids) for frame in range(trj.n_frames): coords = trj.xyz[frame, :, :].reshape(1, trj.xyz.shape[1], trj.xyz.shape[2]) periodic_box = pbc[frame].reshape(1, pbc.shape[1]) waters = frame_data[frame] for wat in waters: self.voxeldata[wat[0], 4] += 1 if energy or hbonds: e_lj_array, e_elec_array = np.copy( self.acoeff), np.copy(self.chg_product) distance_matrix = np.zeros( (self.water_sites, self.all_atom_ids.shape[0])) calc.get_pairwise_distances(wat, self.all_atom_ids, coords, pbc, distance_matrix) wat_nbrs = self.wat_oxygen_atom_ids[np.where( (distance_matrix[0, :][self.wat_oxygen_atom_ids] <= nbr_cutoff_sq) & (distance_matrix[0, :][ self.wat_oxygen_atom_ids] > 0.0))] self.voxeldata[wat[0], 17] += wat_nbrs.shape[0] calc.calculate_energy(wat[1], distance_matrix, e_elec_array, e_lj_array, self.bcoeff) self.voxeldata[wat[0], 11] += np.sum( e_lj_array[:, :self.wat_oxygen_atom_ids[0]]) self.voxeldata[wat[0], 11] += np.sum( e_elec_array[:, :self.wat_oxygen_atom_ids[0]]) self.voxeldata[wat[0], 13] += np.sum( e_lj_array[:, self.wat_oxygen_atom_ids[0]:wat[1]] ) + np.sum(e_lj_array[:, wat[1] + self.water_sites:]) self.voxeldata[wat[0], 13] += np.sum( e_elec_array[:, self.wat_oxygen_atom_ids[0]:wat[1]] ) + np.sum(e_elec_array[:, wat[1] + self.water_sites:]) e_nbr_list = [ np.sum(e_lj_array[:, wat_nbrs + i] + e_elec_array[:, wat_nbrs + i]) for i in xrange(self.water_sites) ] self.voxeldata[wat[0], 15] += np.sum(e_nbr_list) """ ###DEBUG START### elj_sw = np.sum(e_lj_array[:, :self.wat_oxygen_atom_ids[0]]) eelec_sw = np.sum(e_elec_array[:, :self.wat_oxygen_atom_ids[0]]) elj_ww = np.sum(e_lj_array[:, self.wat_oxygen_atom_ids[0]:wat[1]]) + np.sum(e_lj_array[:, wat[1] + 1:]) eelec_ww = np.sum(e_elec_array[:, self.wat_oxygen_atom_ids[0]:wat[1]]) + np.sum(e_elec_array[:, wat[1] + self.water_sites:]) e_nbr_list = [np.sum(e_lj_array[:, wat_nbrs + i] + e_elec_array[:, wat_nbrs + i]) for i in xrange(self.water_sites)] enbr = np.sum(e_nbr_list) print "Calc: ", elj_sw, eelec_sw, elj_ww, eelec_ww, enbr distance_matrix = np.sqrt(distance_matrix) energy_lj, energy_elec = self.calculate_energy(distance_matrix) test_1 = np.sum(energy_lj[:self.wat_oxygen_atom_ids[0]:]) test_2 = np.sum(energy_elec[:, self.non_water_atom_ids]) test_3 = np.nansum(energy_lj[self.wat_oxygen_atom_ids[0]:]) test_4 = np.sum(energy_elec[:, self.wat_atom_ids[0]:wat[1]]) + np.sum(energy_elec[:, wat[1] + self.water_sites:]) test_5 = 0.0 test_5 += np.sum(energy_lj[self.wat_oxygen_atom_ids[0]:][(wat_nbrs - self.wat_oxygen_atom_ids[0]) / self.water_sites]) for i in range(self.water_sites): test_5 += np.sum(energy_elec[:, wat_nbrs + i]) print "Ref: ", test_1, test_2, test_3, test_4, test_5 ###DEBUG END### """ # H-bond calculations if hbonds: prot_nbrs_all = self.non_water_atom_ids[np.where( distance_matrix[0, :][ self.non_water_atom_ids] <= nbr_cutoff_sq)] prot_nbrs_hb = prot_nbrs_all[np.where( self.prot_hb_types[prot_nbrs_all] != 0)] if wat_nbrs.shape[0] != 0 and prot_nbrs_hb.shape[ 0] != 0: # hb_ww, hb_sw = self.calculate_hydrogen_bonds2(coords, wat[1], wat_nbrs, prot_nbrs_hb) hb_ww, hb_sw = self.calculate_hydrogen_bonds( trj, wat[1], wat_nbrs, prot_nbrs_hb) acc_ww = hb_ww[:, 0][np.where( hb_ww[:, 0] == wat[1])].shape[0] don_ww = hb_ww.shape[0] - acc_ww acc_sw = hb_sw[:, 0][np.where( hb_sw[:, 0] == wat[1])].shape[0] don_sw = hb_sw.shape[0] - acc_sw self.voxeldata[wat[0], 23] += hb_sw.shape[0] self.voxeldata[wat[0], 25] += hb_ww.shape[0] self.voxeldata[wat[0], 27] += don_sw self.voxeldata[wat[0], 29] += acc_sw self.voxeldata[wat[0], 31] += don_ww self.voxeldata[wat[0], 33] += acc_ww if wat_nbrs.shape[0] != 0 and hb_ww.shape[ 0] != 0: self.voxeldata[wat[0], 19] += wat_nbrs.shape[ 0] / hb_ww.shape[0] # f_enc = 1.0 - (wat_nbrs.shape[0] / 5.25) # if f_enc < 0.0: # f_enc = 0.0 # self.voxeldata[wat[0], 21] += f_enc if entropy: self.calculate_euler_angles(wat, coords[0, :, :])
import mdtraj as mdt parser = arglib.ArgumentParser() parser.add_argument('traj_dir', help='Directory to find trajectory files.') parser.add_argument('conf_fn', help='Conformation filename that has the same atom names and residue IDs, etc. as the trajectories.') parser.add_argument('output', default='./ProjectInfo.yaml', help='Output filename [ ./ProjectInfo.yaml ]') args = parser.parse_args() traj_list = [ os.path.join(args.traj_dir, fn) for fn in os.listdir(args.traj_dir)] traj_list.sort(key=utils.keynat) # = list.sort(traj_list, key=utils.keynat) print traj_list traj_lens = [] for i in xrange(len(traj_list)): print i shape = len(mdt.open(traj_list[i])) traj_lens.append(shape) records = { 'conf_filename' : args.conf_fn, 'traj_lengths' : traj_lens, 'traj_paths' : traj_list, 'traj_converted_from' : [[] for fn in traj_list], 'traj_errors' : [None for fn in traj_list] } project = Project(records=records) project.save(args.output)
def calculate_site_quantities(self, energy=True, entropy=True, hbonds=True, energy_lr_breakdown=False, angular_structure=False, shell_radii=None, r_theta_cutoff=6.0): """ Performs site-based solvation thermodynamics and structure calculations by iterating over frames in the trajectory. If water molecules in hydration sites are already determined (the case when clustering is already done), then the list of hydration site waters in each frame is used to iterate over each water and calculate its properties. If externally determined hydration sites are provided (when self.clustercenter_file is set to a pdb file of hydration sites) then for each site, corresponding water is found in each frame and is used for caclulations. Parameters ---------- energy : bool, optional Description hbonds : bool, optional Description entropy : bool, optional Description Returns ------- None : NoneType This function updates hydration site data structures to store the results of calculations. """ print_progress_bar(0, self.num_frames) topology = md.load_topology(self.topology_file) read_num_frames = 0 if energy_lr_breakdown: if shell_radii is None: shell_radii = [3.5, 5.5, 8.5] else: assert len(shell_radii) == 3, "Water-water energy decomposition supported only upto 3 solvation shells." \ "Please provide outer radii for three shells." shell_radii = [i**2 for i in shell_radii] shell_radii.insert(0, 0.0) self.energy_ww_lr_breakdown = [[ 0.0 for s in shell_radii ] for i in range(self.hsa_data.shape[0])] if angular_structure: if r_theta_cutoff > 8.0: print( "Warning: r_theta_cutoff > 8.0 can take a long time." "Resetting angular structure distance cutoff to 8.0 Angstrom" ) r_theta_cutoff = 8.0 self.angular_st_distribution = [ [] for i in range(self.hsa_data.shape[0]) ] with md.open(self.trajectory) as f: for frame_i in range(self.start_frame, self.start_frame + self.num_frames): print_progress_bar(frame_i - self.start_frame, self.num_frames) f.seek(frame_i) trj = f.read_as_traj(topology, n_frames=1, stride=1) if trj.n_frames == 0: print("No more frames to read.") break else: self._process_frame(trj, frame_i, energy, hbonds, entropy, energy_lr_breakdown, angular_structure, shell_radii, r_theta_cutoff) read_num_frames += 1 if read_num_frames < self.num_frames: print(( "{0:d} frames found in the trajectory, resetting self.num_frames." .format(read_num_frames))) self.num_frames = read_num_frames if entropy: self.generate_data_for_entropycalcs(self.start_frame, self.num_frames) self.run_entropy_scripts() self.normalize_site_quantities(self.num_frames)