def load_traj(filepath: PathLike, **kwargs) -> mdtraj.Trajectory: """Load a trajectory, if a PDB fails with zero box volume disable volume check and try again.""" filepath = pathlib.Path(filepath) if filepath.suffix.lower() == ".pdb": # PDBs have a couple of things that can go wrong - we handle these here... kwargs.pop("top", None) # Can't specify a topology for `load_pdb` try: return mdtraj.load_pdb(str(filepath), **kwargs) except FloatingPointError: # PDB file loading checks density using the simulation box # This can fail if the box volume is zero trajectory = mdtraj.load_pdb(str(filepath), no_boxchk=True, **kwargs) logger.warning( "Unitcell has zero volume - periodic boundaries will not be accounted for. " "If the molecule is split by a periodic boundary, results will be incorrect." ) return trajectory return mdtraj.load(str(filepath), **kwargs)
def from_pdb(self, filename, selection=None, probe_radius=1.4, **kwargs): r"""Calculate solvent accessible surface area (SASA) from a PDB file If the PBD contains more than one structure, calculation is performed only for the first one. SASA units are Angstroms squared Parameters ---------- filename: str Path to the PDB file selection: str Atomic selection for calculating SASA. All atoms considered if default None is passed. See the `selections page <https://www.mdanalysis.org/docs/documentation_pages/selections.html>`_ for atom selection syntax. probe_radius: float The radius of the probe, in Angstroms kwargs: dict Optional arguments for the underlying mdtraj.shrake_rupley algorithm doing the actual SaSa calculation Returns ------- self: :class:`~idpflex.properties.SaSa` Instantiated SaSa property object """ # noqa: E501 self.selection = selection a_traj = mdtraj.load_pdb(filename) if selection is not None: selection = a_traj.top.select(selection) # atomic indices a_traj = mdtraj.load_pdb(filename, atom_indices=selection) return self.from_mdtraj(a_traj, probe_radius=probe_radius, **kwargs)
def verify_transform(pdb1, pdb2, lenVector): """Verifie que c'est la même protéine dans les deux fichiers pdb puis revoie leur coordonnées sous forme de deux listes -Args: _pdb1: fichier au format pdb _pdb2: fichier au format pdb """ pdb_1 = md.load_pdb(pdb1) # lecture du fichier pdb par le module mdtraj pdb_2 = md.load_pdb(pdb2) if pdb_1.n_atoms != pdb_2.n_atoms or pdb_1.n_residues != pdb_2.n_residues: print( "Pas le même nombre d'atomes/residues entre les deux structures !!" ) print("Nombre d'atomes : {} vs {}".format(pdb_1.n_atoms, pdb_2.n_atoms)) print("Nombre de résidues : {} vs {}".format(pdb_1.n_residues, pdb_2.n_residues)) sys.exit() if pdb_1.n_atoms * 3 != lenVector: print( "Nombre de coordonnées du système: {} vs nombre de coordonnées normales{}" .format(pdb_1.n_atoms * 3, lenVector)) print( "Pas le même nombre de coordonnées normales et de coordonnées atomiques !!" ) sys.exit() xyz1 = [ c * 10 for l in pdb_1[0].xyz[0] for c in l ] # conversion des coordonnées dans un autre format et passage des nm aux angström xyz2 = [c * 10 for l in pdb_2[0].xyz[0] for c in l] return (xyz1, xyz2)
def test_pdb_from_url(): # load pdb from URL t1 = load_pdb('http://www.rcsb.org/pdb/files/4K6Q.pdb.gz') t2 = load_pdb('http://www.rcsb.org/pdb/files/4K6Q.pdb') eq(t1.n_frames, 1) eq(t2.n_frames, 1) eq(t1.n_atoms, 2208) eq(t2.n_atoms, 2208)
def test_load_pdb_input_top(get_fn): pdb = get_fn('native.pdb') p_1 = load_pdb(pdb) p_2 = load_pdb(pdb, top=p_1.topology) eq(p_1.xyz, p_2.xyz) eq(p_1.topology, p_2.topology)
def __init__(self, FG_fname, CG_fname): self.FG_fname = FG_fname self.CG_fname = CG_fname self.FG_trj = md.load_pdb(filename=self.FG_fname).center_coordinates() self.CG_trj = md.load_pdb(filename=self.CG_fname).center_coordinates() self._parse() self._create_mapping() self._align()
def loader(file): try: md.load_pdb(file) except: return None print(file) return None
def __init__(self, CG_pdb_f_name, AA_pdb_f_name): self.CG_pdb_f_name = CG_pdb_f_name self.AA_pdb_f_name = AA_pdb_f_name self.CG_trj = md.load_pdb(filename=self.CG_pdb_f_name).remove_solvent() self.CG_top = self.CG_trj.top self.AA_trj = md.load_pdb(filename=self.AA_pdb_f_name).remove_solvent() self.AA_top = self.AA_trj.top self.CG_beads = parse_CG_pdb(self.CG_pdb_f_name) self.AA_beads = parse_AA_pdb(self.AA_pdb_f_name)
def _construct_traj(self): logger.debug('Loading Trajectory object for model {0} ({1}/{2})'.format(self.df.templateid.iloc[0], 0, len(self.df.model_filepath))) traj = mdtraj.load_pdb(self.df.model_filepath[0]) remove_disulfide_bonds_from_topology(traj.topology) self.traj = traj for m, model_filepath in enumerate(self.df.model_filepath[1:]): logger.debug('Loading Trajectory object for model {0} ({1}/{2})'.format(self.df.templateid.iloc[m+1], m+1, len(self.df.model_filepath))) traj = mdtraj.load_pdb(model_filepath) remove_disulfide_bonds_from_topology(traj.topology) self.traj += traj
def makeDatabase(self, q_values, num_iter, lmax, save_path, beta = 0.0, alpha = 1.0, zeta = 0.0): self.alpha = alpha self.beta = beta self.zeta = zeta self.q_values = q_values self.lmax = lmax self.num_iter = num_iter traj_full = mdtraj.load_pdb( self.traj_full_path ) traj_guess = mdtraj.load_pdb( self.traj_guess_path ) if type(q_values) == str and q_values == 'all': try: assert( np.isclose( self.model_guess.q_values, self.model_full.q_values ).all() ) except AssertionError: print( "q_values of the model_full and model_guess do not match. \ Cannot use all q_values." ) return self.q_values = self.model_full.q_values # do nothing and do not slice the models else: self.model_full.slice_by_qvalues( q_values=self.q_values, inplace=True ) self.model_guess.slice_by_qvalues( q_values=self.q_values, inplace=True) try: assert( np.isclose( self.model_guess.q_values, self.model_full.q_values ).all() ) except AssertionError: print( "q_values of the model_full and model_guess do not match. \ Failed!!!" ) return # retriever the phases self._retrieve_phases() # compare guess to full model self._compare_full_guess() # compute RMSD try: self.rmsd = mdtraj.rmsd( traj_full, traj_guess, frame = 0) except ValueError: self.rmsd = 0 # save self._saveDB( save_path )
def test_segment_id(get_fn): pdb = load_pdb(get_fn('ala_ala_ala.pdb')) pdb.save_pdb(temp) pdb2 = load_pdb(temp) correct_segment_id = 'AAL' # check that all segment ids are set correctly for ridx,r in enumerate(pdb.top.residues): assert r.segment_id == correct_segment_id, "residue %i (0-indexed) does not have segment_id set correctly from ala_ala_ala.pdb"%(ridx) # check that all segment ids are set correctly after a new pdb file is written for ridx,(r1,r2) in enumerate(zip(pdb.top.residues,pdb2.top.residues)): assert r1.segment_id == r2.segment_id, "segment_id of residue %i (0-indexed) in ala_ala_ala.pdb does not agree with value in after being written out to a new pdb file"%(ridx)
def test_1vii_url_and_gz(): t1 = load_pdb('http://www.rcsb.org/pdb/files/1vii.pdb.gz') t2 = load_pdb('http://www.rcsb.org/pdb/files/1vii.pdb') t3 = load_pdb(get_fn('1vii.pdb.gz')) t4 = load_pdb(get_fn('1vii.pdb')) eq(t1.n_frames, 1) eq(t1.n_frames, t2.n_frames) eq(t1.n_frames, t3.n_frames) eq(t1.n_frames, t4.n_frames) eq(t1.n_atoms, t2.n_atoms) eq(t1.n_atoms, t3.n_atoms) eq(t1.n_atoms, t4.n_atoms)
def _get_models(self): self.model = {} root, dirnames, filenames = next(os.walk(self.models_target_dir)) for dirname in dirnames: if 'implicit' in self.model and 'explicit' in self.model: break if 'implicit' not in self.model: implicit_model_filename = os.path.join(self.models_target_dir, dirname, 'implicit-refined.pdb.gz') if os.path.exists(implicit_model_filename): self.model['implicit'] = mdtraj.load_pdb(implicit_model_filename) if 'explicit' not in self.model: explicit_model_filename = os.path.join(self.models_target_dir, dirname, 'explicit-refined.pdb.gz') if os.path.exists(explicit_model_filename): self.model['explicit'] = mdtraj.load_pdb(explicit_model_filename)
def func(arg): xtcf, topf, reffs, p, sel = arg toppdb = md.load_pdb(topf) atm0 = toppdb.top.select(sel) xtc = md.load_xtc(xtcf, topf) xtc = xtc.atom_slice(atm0) rmsd = [] for reff in reffs: refpdb = md.load_pdb(reff) atm1 = refpdb.top.select(sel) ref = refpdb.atom_slice(atm1) rmsd.append(md.rmsd(xtc, ref)) rmsd.append([p] * len(xtc)) return np.array(rmsd).T
def get(self, pdbid): pdbid = pdbid.lower() if len(pdbid) != 4: raise Exception("pdb id must be four characters long") filename = os.path.join(self.temp_dir, '%s.pdb.gz' % pdbid) with open(filename, 'wb') as filehandler: self.conn.retrbinary( 'RETR pub/pdb/data/structures/divided/pdb/%s/pdb%s.ent.gz' % (pdbid[1:3], pdbid), filehandler.write) if self.just_files: return filename else: # then we actually want to load it as a pdb file object and return an mdtraj trajectory with gzip.open(filename) as filehandler: text = filehandler.read() with open(filename[:-3], 'w') as filehandler: filehandler.write(text) pdb = mdtraj.load_pdb(filename[:-3]) return pdb
def clean_pdb(name, route=None, chain_num=None): """ Cleans the structure to only leave the important part. Inputs: * name: str. route of the input .pdb file * route: str. route of the output. will overwrite input if not provided * chain_num: int. index of chain to select (1-indexed as pdb files) Output: route of destin file. """ destin = route if route is not None else name # read input raw_prot = mdtraj.load_pdb(name) # iterate over prot and select the specified chains idxs = [] for chain in raw_prot.topology.chains: # if arg passed, only select that chain if chain_num is not None: if chain_num != chain.index: continue # select indexes of chain chain_idxs = raw_prot.topology.select(f"chainid == {str(chain.index)}") idxs.extend( chain_idxs.tolist() ) # sort: topology and xyz selection are ordered idxs = sorted(idxs) # get new trajectory from the sleected subset of indexes and save prot = mdtraj.Trajectory(xyz=raw_prot.xyz[:, idxs], topology=raw_prot.topology.subset(idxs)) prot.save(destin) return destin
def _load_traj(ag): h, tmp = tempfile.mkstemp(dir='.', suffix='.pdb') os.close(h) prody.writePDB(tmp, ag) traj = md.load_pdb(tmp, frame=0) os.remove(tmp) return traj
def test_4(): t = md.load_pdb(get_fn('1am7_protein.pdb')) a = md.compute_dssp(t, simplified=True) b = md.compute_dssp(t, simplified=False) assert len(a) == len(b) assert len(a[0]) == len(b[0]) assert list(np.unique(a[0])) == ['C', 'E', 'H']
def custom2pdb(coords, proteinnet_id, route): """ Takes a custom representation and turns into a .pdb file. Inputs: * coords: array/tensor of shape (3 x N) or (N x 3). in Angstroms. same order as in the proteinnnet is assumed (same as raw pdb file) * proteinnet_id: str. proteinnet id format (<class>#<pdb_id>_<chain_number>_<chain_id>) see: https://github.com/aqlaboratory/proteinnet/ * route: str. destin route. Output: tuple of routes: (original, generated) for the structures. """ # convert to numpy if isinstance(coords, torch.Tensor): coords = coords.detach().cpu().numpy() # ensure (1, N, 3) if coords.shape[1] == 3: coords = coords.T coords = np.newaxis(coords, axis=0) # get pdb id and chain num pdb_name, chain_num = proteinnet_id.split("#")[-1].split("_")[:-1] pdb_destin = "/".join(route.split("/")[:-1])+"/"+pdb_name+".pdb" # download pdb file and select appropiate download_pdb(pdb_name, pdb_destin) clean_pdb(pdb_destin, chain_num=chain_num) # load trajectory scaffold and replace coordinates - assumes same order scaffold = mdtraj.load_pdb(pdb_destin) scaffold.xyz = coords scaffold.save(route) return pdb_destin, route
def main(args): traj = md.load_pdb('http://www.rcsb.org/pdb/files/2EQQ.pdb') #strip sidechains of residues 2 and 27 idxs = traj.topology.select( 'name CB CA N C or not (resSeq 2 or resSeq 27)') traj = traj.atom_slice(idxs) bins = np.linspace(0.0, 80.0, 41) bin_centers = bins[:-1] + (bins[1] - bins[0]) / 2.0 #get the index of donor and acceptor attachment atoms donorAttIdx = traj.topology.select('name CB and resSeq 2')[0] acceptorAttIdx = traj.topology.select('name CB and resSeq 27')[0] #print header print('Frame#\t' + 'A\t'.join('{:.1f}'.format(e) for e in bin_centers)) for frIdx in range(traj.n_frames): gridDonor = genAV(traj, frIdx, donorAttIdx, 20.0, 2.0, 3.5, 0.9, 0.3, 3.5 + 3.0) gridAcceptor = genAV(traj, frIdx, acceptorAttIdx, 22.0, 2.0, 3.5, 0.9, 0.4, 3.5 + 3.0) donorVolSize = np.count_nonzero(np.array(gridDonor.grid) > 1.0) accVolSize = np.count_nonzero(np.array(gridAcceptor.grid) > 1.0) if donorVolSize == 0 or accVolSize == 0: continue distances = ll.sampleDistanceDistInv(gridDonor, gridAcceptor, 1000000) freq, _ = np.histogram(distances, bins=bins) print('{}\t'.format(frIdx) + '\t'.join(str(e) for e in freq))
def _construct_traj(self): logger.debug( 'Loading Trajectory object for model {0} ({1}/{2})'.format( self.df.templateid.iloc[0], 0, len(self.df.model_filepath))) traj = mdtraj.load_pdb(self.df.model_filepath[0]) remove_disulfide_bonds_from_topology(traj.topology) self.traj = traj for m, model_filepath in enumerate(self.df.model_filepath[1:]): logger.debug( 'Loading Trajectory object for model {0} ({1}/{2})'.format( self.df.templateid.iloc[m + 1], m + 1, len(self.df.model_filepath))) traj = mdtraj.load_pdb(model_filepath) remove_disulfide_bonds_from_topology(traj.topology) self.traj += traj
def load_pdb(fn): t = mt.load_pdb(fn) top = t.topology protein_indices = top.select("protein") t = t.atom_slice(protein_indices) return t[int(t.n_frames * 0.8): ]
def metal_searcher(file): metal_searcher_results = {} metal_searcher_results['ok_file_count'] = 0 metal_searcher_results['error_file_count'] = 0 metal_searcher_results['metals_in_file'] = {} metal_searcher_results['any_metal_in_file'] = 0 for i in metals_list: metal_searcher_results['metals_in_file'][i] = 0 print(file) try: traj = md.load_pdb(file) metal_searcher_results['ok_file_count'] = 1 except: metal_searcher_results['error_file_count'] = 1 return metal_searcher_results topo = traj.topology one_atom_residue_atoms = [atom.name for atom in topo.atoms if atom.name == atom.residue.name and atom.name in metals_list] if one_atom_residue_atoms: metal_searcher_results['any_metal_in_file'] = 1 for atom_name in one_atom_residue_atoms: metal_searcher_results['metals_in_file'][atom_name] += 1 print(metal_searcher_results) return metal_searcher_results
def get(self, pdbid): pdbid = pdbid.lower() if len(pdbid) != 4: raise Exception("pdb id must be four characters long") filename = os.path.join(self.temp_dir, '%s.pdb.gz' % pdbid) with open(filename, 'wb') as filehandler: self.conn.retrbinary('RETR pub/pdb/data/structures/divided/pdb/%s/pdb%s.ent.gz' % (pdbid[1:3], pdbid), filehandler.write) if self.just_files: return filename else: # then we actually want to load it as a pdb file object and return an mdtraj trajectory with gzip.open(filename) as filehandler: text = filehandler.read() with open(filename[:-3], 'w') as filehandler: filehandler.write(text) pdb = mdtraj.load_pdb(filename[:-3]) return pdb
def test_3(tmpdir): # 1COY gives a small error, due to a broken chain. pdbids = ['1GAI', '6gsv', '2AAC'] for pdbid in pdbids: t = md.load_pdb('http://www.rcsb.org/pdb/files/%s.pdb' % pdbid) t = t.atom_slice(t.top.select_atom_indices('minimal')) assert_(call_dssp(tmpdir, t), md.compute_dssp(t, simplified=False)[0])
def _execute(self, directory, available_resources): import mdtraj from paprika.evaluator import Setup from simtk.openmm import app mdtraj_trajectory = mdtraj.load_pdb(self.complex_file_path) atom_indices_by_role = _atom_indices_by_role( self.substance, self.complex_file_path ) host_atom_indices = atom_indices_by_role[Component.Role.Receptor] host_trajectory = mdtraj_trajectory.atom_slice(host_atom_indices) host_file_path = os.path.join(directory, "host_input.pdb") host_trajectory.save(host_file_path) host_structure = Setup.prepare_host_structure(host_file_path) self.output_coordinate_path = os.path.join(directory, "output.pdb") with open(self.output_coordinate_path, "w") as file: app.PDBFile.writeFile( host_structure.topology, host_structure.positions, file, True )
def test_1(): for fn in ['1bpi.pdb', '1vii.pdb', '4K6Q.pdb', '1am7_protein.pdb']: t = md.load_pdb(get_fn(fn)) t = t.atom_slice(t.top.select_atom_indices('minimal')) f = lambda : assert_(call_dssp(t), md.compute_dssp(t, simplified=False)[0]) f.description = 'test_1: %s' % fn yield f
def res_seq(pdb): p = mt.load_pdb(pdb) seq = list(p.topology.residues) seq = [str(x)[:3] for x in seq if len(str(x)) >= 3] return list(map(long2short, seq))
def main(): inp, topf, sel, outn, dt = parse_args() if os.path.isdir(inp): xtcfs = sorted([os.path.join(inp, _) for _ in os.listdir(inp)]) if topf is None: print("Eorror: top file is required!") exit(0) else: meta = pd.read_pickle(inp) topf = meta["top_fn"].values[0] xtcfs = meta["traj_fn"].values top = md.load_pdb(topf) ndx = top.top.select(sel) top = top.atom_slice(ndx) xtcall = None for xtcf in xtcfs: xtc = md.load_xtc(xtcf, top=topf, atom_indices=ndx, stride=dt) if xtcall is None: xtcall = xtc else: xtc.topology = xtcall.topology xtcall = xtcall.join(xtc) outtop = "%s.pdb" % outn outdcd = "%s.dcd" % outn top.save_pdb(outtop) xtcall.superpose(top) xtcall.save_dcd(outdcd)
def make_perturbed_traj(traj_path, save_path, f_perturb): traj = mdtraj.load_pdb(traj_path) mean_dist = np.mean( np.sum( (traj.xyz[0] - traj.xyz[0].mean(0))**2, axis = 1), axis = 0) x = np.random.rand(traj.xyz.shape[1], traj.xyz.shape[2])*2 - 1 shifts = x * f_perturb * mean_dist traj.xyz += shifts traj.save(save_path)
def test_refine_explicit_md_short(): with integrationtest_context(set_up_project_stage='solvated'): targetid = 'EGFR_HUMAN_D0' templateid = 'KC1D_HUMAN_D0_4KB8_D' refine_explicit_md(process_only_these_targets=[targetid], process_only_these_templates=[templateid], sim_length=2.0 * unit.femtosecond, nsteps_per_iteration=1, verbose=True) explicit_metadata_filepath = os.path.join( default_project_dirnames.models, targetid, 'refine_explicit_md-meta0.yaml') explicit_model_filepath = os.path.join(default_project_dirnames.models, targetid, templateid, 'explicit-refined.pdb.gz') explicit_energies_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-energies.txt') explicit_log_filepath = os.path.join(default_project_dirnames.models, targetid, templateid, 'explicit-log.yaml') assert all( map(os.path.exists, [ explicit_model_filepath, explicit_energies_filepath, explicit_log_filepath ])) with open(explicit_log_filepath) as explicit_log_file: explicit_log = yaml.load(explicit_log_file) assert explicit_log.get('finished') is True assert explicit_log.get('successful') is True explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
def metal_searcher(file): metal_searcher_results = {} metal_searcher_results['one_atom_residue_names'] = [] metal_searcher_results['ok_file_count'] = 0 metal_searcher_results['error_file_count'] = 0 print(file) try: traj = md.load_pdb(file) metal_searcher_results['ok_file_count'] = 1 except: metal_searcher_results['error_file_count'] = 1 return metal_searcher_results topo = traj.topology one_atom_residue_atoms = [atom for atom in topo.atoms if atom.name == atom.residue.name] if one_atom_residue_atoms: for atom in one_atom_residue_atoms: metal_searcher_results['one_atom_residue_names'].append(str(atom.name)) print(metal_searcher_results) return metal_searcher_results
def fasta_dict(fasta_dir, prot_ids): fasta_seq_dict = {} for p in prot_ids: if os.path.exists(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb"))) and\ not os.path.exists(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb"))): try: traj = mt.load_pdb(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb"))) fasta = "".join(traj.topology.to_fasta()) print("Converting PDB to fasta") with open(os.path.join(os.path.join(fasta_dir, p+"_protein.pdb")), 'w') as tof: tof.write(">%s\n" % p) tof.write("%s\n" % fasta) tof.close() except: print("Doesn't find pdb or fasta file......") fn = os.path.join(os.path.join(fasta_dir, p+".fasta")) if os.path.exists(fn): fasta_seq_dict[p] = get_fasta_seq(fn) else: print("fasta file not exists: ", fn) fasta_seq_dict[p] = "X" return fasta_seq_dict
def loader(file): print(file) try: traj = md.load_pdb(file) except: return file
def metal_searcher(file): metal_searcher_results = {} metal_searcher_results['one_atom_residue_names'] = [] metal_searcher_results['ok_file_count'] = 0 metal_searcher_results['error_file_count'] = 0 print(file) try: traj = md.load_pdb(file) metal_searcher_results['ok_file_count'] = 1 except: metal_searcher_results['error_file_count'] = 1 return metal_searcher_results topo = traj.topology one_atom_residue_atoms = [ atom for atom in topo.atoms if atom.name == atom.residue.name ] if one_atom_residue_atoms: for atom in one_atom_residue_atoms: metal_searcher_results['one_atom_residue_names'].append( str(atom.name)) print(metal_searcher_results) return metal_searcher_results
def test_solvate_existing_structure_protocol(): """Tests solvating a single methanol molecule in water.""" import mdtraj methanol_component = Component("CO") methanol_substance = Substance() methanol_substance.add_component(methanol_component, ExactAmount(1)) water_substance = Substance() water_substance.add_component(Component("O"), MoleFraction(1.0)) with tempfile.TemporaryDirectory() as temporary_directory: build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol") build_methanol_coordinates.max_molecules = 1 build_methanol_coordinates.substance = methanol_substance build_methanol_coordinates.execute(temporary_directory, ComputeResources()) methanol_residue_name = build_methanol_coordinates.assigned_residue_names[ methanol_component.identifier] solvate_coordinates = SolvateExistingStructure("solvate_methanol") solvate_coordinates.max_molecules = 9 solvate_coordinates.substance = water_substance solvate_coordinates.solute_coordinate_file = ( build_methanol_coordinates.coordinate_file_path) solvate_coordinates.execute(temporary_directory, ComputeResources()) solvated_system = mdtraj.load_pdb( solvate_coordinates.coordinate_file_path) assert solvated_system.n_residues == 10 assert solvated_system.top.residue(0).name == methanol_residue_name
def test_refine_explicit_md_short(): with integrationtest_context(set_up_project_stage='solvated'): targetid = 'EGFR_HUMAN_D0' templateid = 'KC1D_HUMAN_D0_4KB8_D' refine_explicit_md( process_only_these_targets=[targetid], process_only_these_templates=[templateid], sim_length=2.0*unit.femtosecond, nsteps_per_iteration=1, verbose=True ) explicit_metadata_filepath = os.path.join( default_project_dirnames.models, targetid, 'refine_explicit_md-meta0.yaml' ) explicit_model_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-refined.pdb.gz' ) explicit_energies_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-energies.txt' ) explicit_log_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-log.yaml' ) assert all(map( os.path.exists, [explicit_model_filepath, explicit_energies_filepath, explicit_log_filepath] )) with open(explicit_log_filepath) as explicit_log_file: explicit_log = yaml.load(explicit_log_file) assert explicit_log.get('finished') is True assert explicit_log.get('successful') is True explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
def __init__(self, PDB_filename, free_energy, expdata_filename=None, use_log_normal_distances=False, dloggamma=np.log(1.01), gamma_min=0.2, gamma_max=10.0): """Initialize the class. INPUTS conf A molecular structure as an msmbuilder Conformation() object. NOTE: For cases where the structure is an ensemble (say, from clustering) and the modeled NOE distances and coupling constants are averaged, the structure itself can just be a placeholder with the right atom names and numbering free_energy The (reduced) free energy f = beta*F of this conformation """ self.PDB_filename = PDB_filename self.expdata_filename = expdata_filename self.conf = mdtraj.load_pdb(PDB_filename) # Convert the coordinates from nm to Angstrom units self.conf.xyz = self.conf.xyz*10.0 # The (reduced) free energy f = beta*F of this structure, as predicted by modeling self.free_energy = free_energy # Flag to use log-normal distance errors log(d/d0) self.use_log_normal_distances = use_log_normal_distances # Store info about gamma^(-1/6) scaling parameter array self.dloggamma = dloggamma self.gamma_min = gamma_min self.gamma_max = gamma_max self.allowed_gamma = np.exp(np.arange(np.log(self.gamma_min), np.log(self.gamma_max), self.dloggamma)) # Store distance restraint info self.distance_restraints = [] self.distance_equivalency_groups = {} self.ambiguous_groups = [] # list of pairs of group indices, e.g.: [ [[1,2,3],[4,5,6]], [[7],[8]], ...] self.ndistances = 0 # Store dihedral restraint info self.dihedral_restraints = [] self.dihedral_equivalency_groups = {} self.dihedral_ambiguity_groups = {} self.ndihedrals = 0 # Create a KarplusRelation object self.karplus = KarplusRelation() # variables to store pre-computed SSE and effective degrees of freedom (d.o.f.) self.sse_distances = np.array([0.0 for gamma in self.allowed_gamma]) self.Ndof_distances = None self.sse_dihedrals = None self.Ndof_dihedrals = None self.betas = None # if reference is used, an array of N_j betas for each distance self.neglog_reference_priors = None self.sum_neglog_reference_priors = 0.0 # If an experimental data file is given, load in the information self.expdata_filename = expdata_filename if expdata_filename != None: self.load_expdata(expdata_filename)
def test_topology(get_fn): top = md.load_pdb(get_fn('native.pdb')).topology with HDF5TrajectoryFile(temp, 'w') as f: f.topology = top with HDF5TrajectoryFile(temp) as f: assert f.topology == top
def test_load_trajectory(get_fn): # Compare a TNG file to the PDB file it was created from. pdbtraj = md.load_pdb(get_fn('frame0.pdb')) tngtraj = md.load_tng(get_fn('frame0.tng'), top=pdbtraj.topology) eq(pdbtraj.n_frames, tngtraj.n_frames) eq(pdbtraj.unitcell_vectors, tngtraj.unitcell_vectors) eq(pdbtraj.xyz, tngtraj.xyz)
def test_3nch_conect(): # This has conect entries that use all available digits, good failure case. t1 = load_pdb(get_fn('3nch.pdb.gz')) top, bonds = t1.top.to_dataframe() bonds = dict(((a, b), 1) for (a, b) in bonds) eq(bonds[19782, 19783], 1) # Check that last SO4 molecule has right bonds eq(bonds[19782, 19784], 1) # Check that last SO4 molecule has right bonds eq(bonds[19782, 19785], 1) # Check that last SO4 molecule has right bonds eq(bonds[19782, 19786], 1) # Check that last SO4 molecule has right bonds
def _store_highest_seqid_model(self): models_sorted = self.df.sort('seqid').templateid for modelid in models_sorted: model_filepath = os.path.join(self.models_target_dir, modelid, self.model_filename) if os.path.exists(model_filepath): self.ref_modelid = modelid self.ref_model_filepath = model_filepath break self.ref_model_traj = mdtraj.load_pdb(self.ref_model_filepath)
def _align_structures(self): for irow in range(self.nrows): row_first_col = irow * self.ncols self._ref_traj = mdtraj.load_pdb(self.structure_filepaths[row_first_col]) self._heavy_atoms = self._ref_traj.topology.select('not name H') for icol in range(self.ncols): alignment_index = row_first_col + icol structure_filepath = self.structure_filepaths[alignment_index] self._align_structure(structure_filepath, alignment_index)
def __init__(self, PDB_filename, lam, free_energy, data=None, use_log_normal_noe=False, dloggamma=np.log(1.01), gamma_min=0.2, gamma_max=10.0): """Initialize the Restraint class. INPUTS PDB_filename A topology file (*.pdb) lam lambda value (between 0 and 1) free_energy The (reduced) free energy f = beta*F of this conformation data input data for BICePs (both model and exp) use_log_normal_distances Not sure what's this... dloggamma gamma is in log space gamma_min min value of gamma gamma_max max value of gamma """ # Store restraint info self.restraints = [] # a list of data container objects for each restraint (e.g. NMR_Chemicalshift_Ca()) self.n = 0 self.sse = 0 self.Ndof = None # used for exponential reference potential self.betas = None self.neglog_exp_ref = None self.sum_neglog_exp_ref = 0.0 # used for Gaussian reference potential self.ref_sigma = None self.ref_mean = None self.neglog_gau_ref = None self.sum_neglog_gau_ref = 0.0 #NOTE: This is where we appended the code from previous self.PDB_filename = PDB_filename self.data = data self.conf = mdtraj.load_pdb(PDB_filename) # Convert the coordinates from nm to Angstrom units self.conf.xyz = self.conf.xyz*10.0 # The (reduced) free energy f = beta*F of this structure, as predicted by modeling self.free_energy = lam*free_energy # Store info about gamma^(-1/6) scaling parameter array self.dloggamma = dloggamma self.gamma_min = gamma_min self.gamma_max = gamma_max self.allowed_gamma = np.exp(np.arange(np.log(self.gamma_min), np.log(self.gamma_max), self.dloggamma)) # Flag to use log-normal distance errors log(d/d0) self.use_log_normal_noe = use_log_normal_noe # Create a KarplusRelation object self.karplus = KarplusRelation()
def get_renumbered_topol_resnums(target): models_target_dir = os.path.join(default_project_dirnames.models, target.id) renumbered_resnums = {} for topol_type in ['implicit', 'explicit']: topol_path = os.path.join(models_target_dir, 'topol-renumbered-{}.pdb'.format(topol_type)) if not os.path.exists(topol_path): continue traj = mdtraj.load_pdb(topol_path) res_numbers = [resi.resSeq for resi in traj.top.residues] renumbered_resnums[topol_type] = res_numbers logger.info('Will use renumbered residues from {} for target {}'.format(topol_path, target.id)) return renumbered_resnums
def add_bonds_to_pdb(infile, topfile): t = md.load_pdb(infile) if t.n_residues == 1: return first = t.top.residue(0) t.restrict_atoms(range(first.n_atoms)) gtop = app.GromacsTopFile(topfile)._currentMoleculeType top, bonds = t.top.to_dataframe() bonds = np.array([(row[0],row[1]) for row in gtop.bonds],'int') bonds = bonds -1 t.top = md.Topology.from_dataframe(top, bonds) t.save(infile)
def haszn(i): try: traj = md.load_pdb(i) except: return None topo = traj.topology atoms = [str(x) for x in topo.atoms] atomszn = [x for x in atoms if "ZN" in x] if atomszn: znpdbs.append(i)
def metal_scanner(file): contains_metal = False try: traj = md.load_pdb(file) except: return None if [atom for atom in traj.top.atoms if atom.name == 'ZN']: contains_metal = True print(contains_metal) return contains_metal
def metal_scanner(file): contains_metal = [False, file] try: traj = md.load_pdb(file) except: return [None, file] if [atom for atom in traj.top.atoms if atom.name == 'ZN' and atom.residue.name == 'ZN']: contains_metal[0] = True print(contains_metal) return contains_metal
def file_reader(file): global ok_file_count global error_file_count try: traj = md.load_pdb(file) with lock: ok_file_count.value += 1 except: with lock: error_file_count.value += 1 print(file) print(ok_file_count.value)
def __init__(self, PDB_filename, ref, dlogsigma=np.log(1.02), sigma_min=0.05, sigma_max=20.0, use_global_ref_sigma=True): """Initialize the Restraint class. :param str PDB_filename: A topology file (*.pdb) :param str ref: Reference potential. :param float default=np.log(1.02) dlogsigma: :param float sigma_min: default = 0.05 :param float sigma_max: default = 20.0 :param bool default=True use_global_ref_sigma: """ # Store restraint info self.restraints = [] # a list of data container objects for each restraint (e.g. NMR_Chemicalshift_Ca()) # Conformational Information self.PDB_filename = PDB_filename self.conf = mdtraj.load_pdb(PDB_filename) # Convert the coordinates from nm to Angstrom units self.conf.xyz = self.conf.xyz*10.0 # used for exponential reference potential self.betas = None self.neglog_exp_ref = None self.sum_neglog_exp_ref = 0.0 # used for Gaussian reference potential self.ref_sigma = None self.ref_mean = None self.neglog_gaussian_ref = None self.sum_neglog_gaussian_ref = 0.0 self.use_global_ref_sigma = use_global_ref_sigma self.see = None # Storing the reference potential self.ref = ref # set sigma range self.dlogsigma = dlogsigma self.sigma_min = sigma_min self.sigma_max = sigma_max self.allowed_sigma = np.exp(np.arange(np.log(self.sigma_min), np.log(self.sigma_max), self.dlogsigma)) self.sigma_index = len(self.allowed_sigma)/2 self.sigma = self.allowed_sigma[self.sigma_index]
def __init__(self, PDB_filename, ref, dlogsigma=np.log(1.02), sigma_min=0.05, sigma_max=20.0, use_global_ref_sigma=True): """Initialize the Restraint class. INPUTS ------ PDB_filename A topology file (*.pdb) data input data for BICePs (both model and exp) ref Reference potential. """ # Store restraint info self.restraints = [] # a list of data container objects for each restraint (e.g. NMR_Chemicalshift_Ca()) # Conformational Information self.PDB_filename = PDB_filename self.conf = mdtraj.load_pdb(PDB_filename) # Convert the coordinates from nm to Angstrom units self.conf.xyz = self.conf.xyz*10.0 # used for exponential reference potential self.betas = None self.neglog_exp_ref = None self.sum_neglog_exp_ref = 0.0 # used for Gaussian reference potential self.ref_sigma = None self.ref_mean = None self.neglog_gaussian_ref = None self.sum_neglog_gaussian_ref = 0.0 self.use_global_ref_sigma = use_global_ref_sigma self.see = None # Storing the reference potential self.ref = ref # set sigma range self.dlogsigma = dlogsigma self.sigma_min = sigma_min self.sigma_max = sigma_max # self.allowed_sigma = np.exp(np.arange(np.log(self.sigma_min), # np.log(self.sigma_max), self.dlogsigma)) self.allowed_sigma = np.arange(self.sigma_min,self.sigma_max,self.dlogsigma) self.sigma_index = len(self.allowed_sigma)/2 self.sigma = self.allowed_sigma[self.sigma_index]
def test_refine_implicit_md_short(): with integrationtest_context(set_up_project_stage='clustered'): targetid = 'EGFR_HUMAN_D0' templateid = 'KC1D_HUMAN_D0_4KB8_D' refine_implicit_md( process_only_these_targets=[targetid], process_only_these_templates=[templateid], sim_length=2.0*unit.femtosecond, nsteps_per_iteration=1, minimization_steps=1, loglevel='debug' ) implicit_metadata_filepath = os.path.join( default_project_dirnames.models, targetid, 'refine_implicit_md-meta0.yaml' ) implicit_model_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'implicit-refined.pdb.gz' ) implicit_energies_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'implicit-energies.txt' ) implicit_log_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'implicit-log.yaml' ) assert all(map( os.path.exists, [implicit_model_filepath, implicit_energies_filepath, implicit_log_filepath] )) with open(implicit_log_filepath) as implicit_log_file: implicit_log = yaml.load(implicit_log_file) assert implicit_log.get('finished') is True assert implicit_log.get('successful') is True assert implicit_log.get('ph') == 8.0 assert os.path.exists(implicit_metadata_filepath) with open(implicit_metadata_filepath) as implicit_metadata_file: implicit_metadata = yaml.load(implicit_metadata_file) assert implicit_metadata.get('refine_implicit_md').get('custom_residue_variants') == { 'EGFR_HUMAN_D0': {49: 'ASH'} } implicit_model_traj = mdtraj.load_pdb(implicit_model_filepath) resis = [resi for resi in implicit_model_traj.top.residues] resi49 = resis[49] resi49_atom_strings = [str(atom) for atom in resi49.atoms] assert 'ASP50-HD2' in resi49_atom_strings