def build(self, ff_name, water_name): out_filename = self.get_initial_pdb_filename(ff_name, water_name) utils.make_path(out_filename) if os.path.exists(out_filename): return if self.pdb_filename is not None: fixer = pdbfixer.PDBFixer(filename=self.pdb_filename) else: fixer = pdbfixer.PDBFixer(pdbid=self.pdb_id) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) fixer.addMissingHydrogens(pH=self.pH) n_chains = len(list(fixer.topology.chains())) chains_to_remove = np.setdiff1d(np.arange(n_chains), self.keep_chains) fixer.removeChains(chains_to_remove) app.PDBFile.writeFile(fixer.topology, fixer.positions, open(out_filename, 'w'))
def fix_pdbfixer(self, input_filename, ph=7.0, add_H=False, file_type='cif'): """This method is called by add_missing_atoms Reads an PDBx/mmCIF or PDB""" import pdbfixer import simtk.openmm.app if file_type == 'cif': with open(input_filename, 'r') as f: fixer = pdbfixer.PDBFixer(pdbxfile=f) elif file_type == 'pdb': with open(input_filename, 'r') as f: fixer = pdbfixer.PDBFixer(pdbfile=f) else: raise ValueError(f"file_type must be cif or pdb not {file_type}") fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() #fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() if add_H == True: fixer.addMissingHydrogens(ph) #fixer.addSolvent(fixer.topology.getUnitCellDimensions()) return fixer
def __load_to_pdbfixer(self): """Class to write a temporary PDB file and reload using PDBFixer. If PDBFixer was never called before, runs and caches the resulting Structure object. Always resets/empties the missing lists to avoid conflicts. """ if self._pdbfixer is None: with tempfile.TemporaryFile(mode='r+') as handle: app.PDBFile.writeFile(self.topology, self.positions, handle, keepIds=True) handle.seek(0) # rewind s = pf.PDBFixer(pdbfile=handle) self._pdbfixer = s sequences = [] for chain in self._pdbfixer.topology.chains(): chain_reslist = [r.name for r in chain.residues()] sequences.append(Sequence(chain.id, chain_reslist)) self._pdbfixer.sequences = self.sequences = sequences self._pdbfixer.missingAtoms = {} self._pdbfixer.missingResidues = {} self._pdbfixer.missingTerminals = {} logging.debug('Cached PDBFixer Data Structure')
def test_mutate_2(): fixer = pdbfixer.PDBFixer(pdbid='1VII') fixer.applyMutations(["ALA-57-LEU", "SER-56-ALA"], "A") fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) temp_pdb = tempfile.NamedTemporaryFile(mode='w+') new_residue57 = list(fixer.topology.residues())[16] new_residue56 = list(fixer.topology.residues())[15] assert new_residue57.name == "LEU", "Name of mutated residue did not change correctly!" assert new_residue56.name == "ALA", "Name of mutated residue did not change correctly!" assert len(list( new_residue56.atoms())) == 10, "Should have 10 atoms in ALA 56" assert len(list( new_residue57.atoms())) == 19, "Should have 19 atoms in LEU 57" atom_names = set([atom.name for atom in new_residue56.atoms()]) desired_atom_names = set( ["N", "CA", "CB", "C", "O", "H", "HA", "HB1", "HB2", "HB3"]) assert atom_names == desired_atom_names, "Atom Names did not match for ALA 56" atom_names = set([atom.name for atom in new_residue57.atoms()]) desired_atom_names = set([ "C", "N", "CA", "CB", "CG", "CD1", "CD2", "O", "H", "HA", "HB2", "HB3", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23", "HG" ]) assert atom_names == desired_atom_names, "Atom Names did not match for LEU 57"
def pdb2xyz(inputfile, outputPrefix, keepIntermediate=False): """pdb2xyz: Transform a pdb file to a goccs compatible xyz file with number of atoms, elements and coordinates into an ouputfile, prefixed with outputPrefix.xyz. If you set keepIntermediate to true then the pdb file written by PDBFixer will be kept in the output folder. """ pdbfixedfilename = outputPrefix + "_fixed.pdb" xyzoutfilename = outputPrefix + ".xyz" fixer = pdbfixer.PDBFixer(inputfile) fixer.removeHeterogens(False) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbfixedfilename, 'w')) parser = PDB.PDBParser() #parser = PDB.MMCIFParser() #in case it's a cif file structure = parser.get_structure("input", pdbfixedfilename) #print(dir(structure)) natoms = sum(1 for _ in structure.get_atoms()) #print("Writing output") outputhandle = open(xyzoutfilename, "w") outputhandle.write("""%d empty line\n""" % (natoms)) for atom in structure.get_atoms(): element = atom.element coords = atom.get_coord() outputhandle.write("%s %.3f %.3f %.3f\n" % (element, coords[0], coords[1], coords[2])) outputhandle.close() if not keepIntermediate: os.remove(pdbfixedfilename)
def fix(pdbid, padding=PADDING): fixer = pdbfixer.PDBFixer(pdbid=pdbid) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(True) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) numChains = len(list(fixer.topology.chains())) fixer.removeChains(range(1, numChains)) file_handle = open("%s_fixed.pdb" % pdbid, 'wb') app.PDBFile.writeFile(fixer.topology, fixer.positions, file_handle) file_handle.close() ff_name = "amber99sbildn" water_name = 'tip3p' which_forcefield = "%s.xml" % ff_name which_water = '%s.xml' % water_name out_pdb_filename = "./equil/equil.pdb" ff = app.ForceField(which_forcefield, which_water) modeller = app.Modeller(fixer.topology, fixer.positions) modeller.addSolvent(ff, padding=padding) app.PDBFile.writeFile(modeller.topology, modeller.positions, open("./%s_box.pdb" % pdbid, 'w'))
def add_hydrogens_to_mol(mol): """ Add hydrogens to a molecule object TODO (LESWING) see if there are more flags to add here for default :param mol: Rdkit Mol :return: Rdkit Mol """ molecule_file = None try: pdbblock = Chem.MolToPDBBlock(mol) pdb_stringio = StringIO() pdb_stringio.write(pdbblock) pdb_stringio.seek(0) import pdbfixer fixer = pdbfixer.PDBFixer(pdbfile=pdb_stringio) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.4) hydrogenated_io = StringIO() import simtk simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io) hydrogenated_io.seek(0) return Chem.MolFromPDBBlock( hydrogenated_io.read(), sanitize=False, removeHs=False) except ValueError as e: logging.warning("Unable to add hydrogens %s", e) raise MoleculeLoadException(e) finally: try: os.remove(molecule_file) except (OSError, TypeError): pass
def process_pdb(pdb_file): # use the pdbfixer utility to make sure the pdbfile is properly represented out_file = args.output + str(pdb_file).replace(args.input, "") out_dir = out_file.replace("/com.pdb", "") if not os.path.exists(out_dir): os.makedirs(out_dir) with open(pdb_file) as f: fixer = pdbfixer.PDBFixer(pdbfile=f) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.4) with open(out_file, 'w') as handle: simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, handle) # now read back using biopython :) and apply the water/hydrogen filters parser = PDBParser(QUIET=True, structure_builder=SloppyStructureBuilder()) structure = parser.get_structure('', out_file) io = PDBIO() io.set_structure(structure) io.save(out_file, WaterSelect())
def from_fixPDB(cls, filename=None, pdbfile=None, pdbxfile=None, url=None, pdbid=None, **kwargs): """Uses the pdbfixer library to fix a pdb file, replacing non standard residues, removing hetero-atoms and adding missing hydrogens. The input is a pdb file location, the output is a fixer object, which is a pdb in the openawsem format.""" import pdbfixer fixer = pdbfixer.PDBFixer(filename=filename, pdbfile=pdbfile, pdbxfile=pdbxfile, url=url, pdbid=pdbid) fixer.findMissingResidues() chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms( ) # Warning: importing 'simtk.openmm' is deprecated. Import 'openmm' instead. fixer.addMissingHydrogens(7.0) pdb = fixer """ Parses a pdb in the openmm format and outputs a table that contains all the information on a pdb file """ cols = [ 'recname', 'serial', 'name', 'altLoc', 'resName', 'chainID', 'resSeq', 'iCode', 'x', 'y', 'z', 'occupancy', 'tempFactor', 'element', 'charge' ] data = [] for atom, pos in zip(pdb.topology.atoms(), pdb.positions): residue = atom.residue chain = residue.chain pos = pos.value_in_unit(pdbfixer.pdbfixer.unit.angstrom) data += [ dict( zip(cols, [ 'ATOM', int(atom.id), atom.name, '', residue.name, chain.id, int(residue.id), '', pos[0], pos[1], pos[2], 0, 0, atom.element.symbol, '' ])) ] atom_list = pandas.DataFrame(data) atom_list = atom_list[cols] atom_list.index = atom_list['serial'] return cls(atom_list, **kwargs)
def remove_chain_indices_and_verify(chain_indices_to_remove, expected_chain_ids_remaining): # Create a PDBFixer instance for the given pdbid fixer = pdbfixer.PDBFixer(pdbfile=StringIO(file_content)) # Remove specified chains. fixer.removeChains(chainIndices=chain_indices_to_remove) # Check to make sure asserted chains remain. chain_ids_remaining = [c.id for c in fixer.topology.chains()] assert_list_equal(chain_ids_remaining, expected_chain_ids_remaining)
def remove_chains_and_verify(file_content, expected_chain_ids_remaining, **kws): # Create a PDBFixer instance for the given pdbid fixer = pdbfixer.PDBFixer(pdbfile=StringIO(file_content)) # Remove specified chains. fixer.removeChains(**kws) # Check to make sure asserted chains remain. chain_ids_remaining = [c.id for c in fixer.topology.chains()] assert expected_chain_ids_remaining == chain_ids_remaining
def remove_chain_ids_and_verify(pdbid, chain_ids_to_remove, expected_chain_ids_remaining): # Create a PDBFixer instance for the given pdbid fixer = pdbfixer.PDBFixer(pdbid=pdbid) # Remove specified chains. fixer.removeChains(chainIds=chain_ids_to_remove) # Check to make sure asserted chains remain. chain_ids_remaining = [c.chain_id for c in fixer.structureChains] assert_items_equal(chain_ids_remaining, expected_chain_ids_remaining)
def fix(pdbid, missing_residues, padding=PADDING, mutations=None): fixer = pdbfixer.PDBFixer(filename="%s.pdb" % pdbid) if mutations is not None: fixer.applyMutations(mutations[0], mutations[1]) fixer.missingResidues = missing_residues fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(True) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) numChains = len(list(fixer.topology.chains())) fixer.removeChains(range(1, numChains)) file_handle = open("./pdbs/%s_fixed0.pdb" % pdbid, 'wb') app.PDBFile.writeFile(fixer.topology, fixer.positions, file_handle) file_handle.close() traj0 = md.load("./pdbs/%s_fixed0.pdb" % pdbid) traj = sort_atoms(traj0) filename1 = "./pdbs/%s_fixed1.pdb" % pdbid traj.save(filename1) filename = "./pdbs/%s_fixed.pdb" % pdbid # Need to sync the protonation state of one histidine cmd = """grep -v 'HE1 HIS A 119' %s |grep -v 'HE2 HIS A 119'|grep -v 'HD1 HIS A 119'|grep -v 'HD2 HIS A 119' > %s""" % (filename1, filename) os.system(cmd) #os.system("grep -v 'HE2 HIS A 119' %s > %s""" % (filename1, filename)) #os.system("grep -v 'HD1 HIS A 119' %s > %s""" % (filename1, filename)) #os.system("grep -v 'HD2 HIS A 119' %s > %s""" % (filename1, filename)) ff_name = "amber99sbildn" water_name = 'tip3p' which_forcefield = "%s.xml" % ff_name which_water = '%s.xml' % water_name out_pdb_filename = "./equil/equil.pdb" ff = app.ForceField(which_forcefield, which_water) pdb = app.PDBFile(filename) modeller = app.Modeller(pdb.topology, pdb.positions) variants = [None for i in range(161)] variants[118] = "HIE" modeller.addHydrogens(ff, variants=variants) modeller.addSolvent(ff, padding=padding) app.PDBFile.writeFile(modeller.topology, modeller.positions, open("./pdbs/%s_box.pdb" % pdbid, 'w'))
def pdbfixerTransform(filename, replace_nonstandard_residues, add_missing_residues, add_missing_atoms): """ Adds missing residues and/or missing atoms to a PDB file. Parameters ---------- filename : str Name of the input PDB file. replace_nonstandard_residues : bool Whether to replace nonstandard residues with their standard equivalents. add_missing_residues : bool Whether to add missing residues. add_missing_atoms : bool Whether to add missing atoms. Returns ------- filename_output : str Absolute path to the modified file. """ if not replace_nonstandard_residues and not add_missing_atoms \ and not add_missing_residues: return _os.path.abspath(filename) fix = _pdbfix.PDBFixer(filename=filename) if replace_nonstandard_residues: fix.findNonstandardResidues() fix.replaceNonstandardResidues() if add_missing_residues: fix.findMissingResidues() else: fix.missingResidues = [] if add_missing_atoms: fix.findMissingAtoms() else: fix.missingAtoms = [] fix.missingTerminals = [] fix.addMissingAtoms() filename_output = _os.path.splitext(filename)[0] + "_pdbfixer.pdb" _PDBFile.writeFile(fix.topology, fix.positions, open(filename_output, "w")) return fixPDBFixerPDB(filename_output, filename, replace_nonstandard_residues, add_missing_residues, add_missing_atoms, filename_output)
def mutate(pdb_path, mut_region=None, chain_id=None): """ Make a mutant protein easy. Parameters ---------- pdb_path: Give your pdb whole path to this parameter mut_region : list of strings Each string must include the resName (original), index, and resName (target). For example, ALA-133-GLY will mutate alanine 133 to glycine. chain_id : str Chain ID to apply mutation. Example ---------- mutate('C:/Users/HIbrahim/Desktop/MolDynAnalyze/test/last.pdb', mut_region=['ASP-306-ARG'], chain_id='A') """ try: pdb_name = os.path.basename(pdb_path).split('.')[0] pdb_directory = os.path.dirname(pdb_path) mut_file_name = pdb_name + '_chain' + chain_id + '_' + str( mut_region[0]) + '.pdb' mut_file_path = os.path.join(pdb_directory, mut_file_name) fixer = pdbfixer.PDBFixer(pdb_path) fixer.applyMutations(mut_region, chain_id) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() with open(mut_file_path, 'w') as w_file: app.PDBFile.writeFile(fixer.topology, fixer.positions, w_file, keepIds=True) except ValueError as error: print(error) print('Please Check Your Input Parameters !!')
def _step_3_pdbfixer(first_model, temp3): for chain in first_model: for res in chain: for atom in res: atom.set_altloc(" ") PDBIO.set_structure(first_model) PDBIO.save(temp3) temp3.flush() # Use PDBFixer to fix common PDB errors fixer = pdbfixer.PDBFixer(temp3.name) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) return temp3, fixer
def test_mutate_1(): fixer = pdbfixer.PDBFixer(pdbid='1VII') fixer.applyMutations(["ALA-57-GLY"], "A") fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) temp_pdb = tempfile.NamedTemporaryFile() app.PDBFile.writeFile(fixer.topology, fixer.positions, temp_pdb) pdb = app.PDBFile(temp_pdb.name) new_residue57 = list(fixer.topology.residues())[16] assert new_residue57.name == "GLY", "Name of mutated residue did not change correctly!" assert len(list(new_residue57.atoms())) == 7, "Should have 7 atoms in GLY 56" atom_names = set([atom.name for atom in new_residue57.atoms()]) desired_atom_names = set(["N", "CA", "C", "O", "H", "HA3", "HA2"]) assert atom_names == desired_atom_names, "Atom Names did not match for GLY 56"
def fixPDB(pdb_file): """Uses the pdbfixer library to fix a pdb file, replacing non standard residues, removing hetero-atoms and adding missing hydrogens. The input is a pdb file location, the output is a fixer object, which is a pdb in the openawsem format. Manual on https://raw.githubusercontent.com/pandegroup/pdbfixer/master/Manual.html""" fixer = pdbfixer.PDBFixer(filename=pdb_file) fixer.findMissingResidues() chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms( ) # Only run when the SEQ section in PDB file contains info of sequence fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) return fixer
def fixPDB(pdb, pdbname): """ prepares the PDB structure for simulation/minimization usingn the openMM PDBfixer """ add_hyds = True fixer = pdbfixer.PDBFixer(filename=pdb) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(keepWater='keep_crystallographic_water') if add_hyds: fixer.addMissingHydrogens(7.0) # only if we want protons! outfile = open(pdbname, 'w') PDBFile.writeFile(fixer.topology, fixer.positions, outfile) outfile.close()
def add_hydrogens_to_mol(mol): """Add hydrogens to an RDKit molecule instance. Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule instance. Returns ------- mol : rdkit.Chem.rdchem.Mol RDKit molecule instance with hydrogens added. For failures in adding hydrogens, the original RDKit molecule instance will be returned. """ try: pdbblock = Chem.MolToPDBBlock(mol) pdb_stringio = StringIO() pdb_stringio.write(pdbblock) pdb_stringio.seek(0) fixer = pdbfixer.PDBFixer(pdbfile=pdb_stringio) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.4) hydrogenated_io = StringIO() simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io) hydrogenated_io.seek(0) mol = Chem.MolFromPDBBlock(hydrogenated_io.read(), sanitize=False, removeHs=False) pdb_stringio.close() hydrogenated_io.close() except ValueError: warnings.warn('Failed to add hydrogens to the molecule.') return mol
def fix_pdb(pdbfile, alterations_info): """Apply pdbfixer to the contents of a PDB file; return a PDB string result. 1) Replaces nonstandard residues. 2) Removes heterogens (non protein residues) including water. 3) Adds missing residues and missing atoms within existing residues. 4) Adds hydrogens assuming pH=7.0. 5) KeepIds is currently true, so the fixer must keep the existing chain and residue identifiers. This will fail for some files in wider PDB that have invalid IDs. Args: pdbfile: Input PDB file handle. alterations_info: A dict that will store details of changes made. Returns: A PDB string representing the fixed structure. """ fixer = pdbfixer.PDBFixer(pdbfile=pdbfile) fixer.findNonstandardResidues() alterations_info['nonstandard_residues'] = fixer.nonstandardResidues fixer.replaceNonstandardResidues() _remove_heterogens(fixer, alterations_info, keep_water=False) fixer.findMissingResidues() alterations_info['missing_residues'] = fixer.missingResidues fixer.findMissingAtoms() alterations_info['missing_heavy_atoms'] = fixer.missingAtoms alterations_info['missing_terminals'] = fixer.missingTerminals fixer.addMissingAtoms(seed=0) fixer.addMissingHydrogens() out_handle = io.StringIO() app.PDBFile.writeFile(fixer.topology, fixer.positions, out_handle, keepIds=True) return out_handle.getvalue()
import simtk.openmm.app as app import pdbfixer mutation_string = "GLY-112-ALA" fixer = pdbfixer.PDBFixer(pdbid='2LCB') fixer.applyMutations([mutation_string]) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) fixer.addMissingHydrogens(7.0) numChains = len(list(fixer.topology.chains())) fixer.removeChains(range(1, numChains)) app.PDBFile.writeFile(fixer.topology, fixer.positions, open("./pdb_fixed/2LCB_%s.pdb" % mutation_string, 'w')) fixer = pdbfixer.PDBFixer(pdbid='2LCB') fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) fixer.addMissingHydrogens(7.0) numChains = len(list(fixer.topology.chains())) fixer.removeChains(range(1, numChains)) app.PDBFile.writeFile(fixer.topology, fixer.positions, open("./pdb_fixed/2LCB.pdb", 'w'))
from simtk.openmm import app import simtk.openmm as mm from simtk import unit as u import pdbfixer padding = 1.0 * u.nanometers cutoff = 0.95 * u.nanometers ff = app.ForceField('amber99sbnmr.xml', 'tip3p-fb.xml') temperature = 293. pressure = 1.0 * u.atmospheres fixer = pdbfixer.PDBFixer("./1am7.pdb") fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) fixer.addMissingHydrogens() fixer.removeChains([1, 2, 3, 4, 5]) app.PDBFile.writeFile(fixer.topology, fixer.positions, open("1am7_fixed.pdb", 'w'))
def create_mutation_file(mutation, pdb_dir, pdb_output_dir): pdb_id, mutation, ddg = mutation wt, res_index, mutant = mutation[0] print("Processing: %s %s-%s-%s" % (pdb_id, wt, res_index, mutant)) pdb_path = os.path.join(pdb_dir, pdb_id + ".pdb") # Get the PDB sequence pdb_parser = Bio.PDB.PDBParser() structure = pdb_parser.get_structure(pdb_id, pdb_path) ppb = Bio.PDB.PPBuilder() pps = list(ppb.build_peptides(structure)) pdb_initial_res_index = pps[0][0].get_id()[1] assert (len(pps) == 1) wt_seq = pps[0].get_sequence() assert (wt_seq[res_index - pdb_initial_res_index] == wt) # Construct the mutant sequence mutant_seq = list(wt_seq.lower()) mutant_seq[res_index - pdb_initial_res_index] = mutant mutant_seq = "".join(mutant_seq) # Save the output in a file output_basename = "%s_%s-%d-%s" % (pdb_id, wt, res_index, mutant) sequence_output_path = os.path.join(pdb_output_dir, output_basename + ".txt") with open(sequence_output_path, "w") as sequence_output_file: sequence_output_file.write(mutant_seq) # Construct the mutant PDB file pdb_output_path = os.path.join(pdb_output_dir, output_basename + ".pdb") p = subprocess.Popen([ "Scwrl4", "-i", pdb_path, "-s", sequence_output_path, "-o", pdb_output_path ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() if p.returncode != 0: print("Error running Scwrl4 on:", pdb_output_path) print(err) with open(os.path.join(pdb_output_dir, output_basename + ".scwrl"), "w") as scwrl_output_file: scwrl_output_file.write(out) # Fix the output structure fixer = pdbfixer.PDBFixer(filename=pdb_output_path) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) with open(pdb_output_path, "w") as pdb_output_file: simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, pdb_output_file, keepIds=True)
def apply_pdbfixer(mol, add_missing=True, hydrogenate=True, pH=7.4, remove_heterogens=True, is_protein=True): """ Apply PDBFixer to a molecule to try to clean it up. Parameters ---------- mol: Rdkit Mol Molecule to clean up. add_missing: bool, optional If true, add in missing residues and atoms hydrogenate: bool, optional If true, add hydrogens at specified pH pH: float, optional The pH at which hydrogens will be added if `hydrogenate==True`. Set to 7.4 by default. remove_heterogens: bool, optional Often times, PDB files come with extra waters and salts attached. If this field is set, remove these heterogens. is_protein: bool, optional If false, then don't remove heterogens (since this molecule is itself a heterogen). Returns ------- Rdkit Mol Note ---- This function requires RDKit and PDBFixer to be installed. """ molecule_file = None try: from rdkit import Chem pdbblock = Chem.MolToPDBBlock(mol) pdb_stringio = StringIO() pdb_stringio.write(pdbblock) pdb_stringio.seek(0) import pdbfixer fixer = pdbfixer.PDBFixer(pdbfile=pdb_stringio) if add_missing: fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() if hydrogenate: fixer.addMissingHydrogens(pH) if is_protein and remove_heterogens: # False here specifies that water is to be removed fixer.removeHeterogens(False) hydrogenated_io = StringIO() import simtk simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io) hydrogenated_io.seek(0) return Chem.MolFromPDBBlock(hydrogenated_io.read(), sanitize=False, removeHs=False) except ValueError as e: logger.warning("Unable to add hydrogens %s", e) raise MoleculeLoadException(e) finally: try: os.remove(molecule_file) except (OSError, TypeError): pass
def test_mutate_5_fails(): fixer = pdbfixer.PDBFixer(pdbid='1VII') fixer.applyMutations(["ALA-1000-GLY", "SER-56-ALA"], "A")
protonate=1 pH=10.0 #fixed ph to charge the protein def writeMfjOutput(protein,mfjfilename): """Prepare the mobcal input file from a prepared openmm system""" print("Reading file: "+inputfilename) fixer=pdbfixer.PDBFixer(inputfilename) fixer.removeHeterogens(False) fixer.findMissingResidues() print("Missing residues: ") print(fixer.missingResidues) fixer.findMissingAtoms() print("Missing atoms: ") print(fixer.missingAtoms) if(len(fixer.missingAtoms)): print("adding missing atoms") fixer.addMissingAtoms() if protonate: print("Protonating at pH: "+str(pH)) fixer.addMissingHydrogens(pH)
def test_mutate_4_fails(): fixer = pdbfixer.PDBFixer(pdbid='1VII') fixer.applyMutations(["ALA-57-WTF", "SER-56-ALA"], "A")
import pdbfixer from simtk.openmm.app import PDBFile fixer = pdbfixer.PDBFixer('6nb8_2ajf_complex.pdb') fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() PDBFile.writeFile(fixer.topology, fixer.positions, open('6nb8_2ajf_complex_fixed.pdb', 'w')) fixer = pdbfixer.PDBFixer('6nb8_2ghv_complex.pdb') fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() PDBFile.writeFile(fixer.topology, fixer.positions, open('6nb8_2ghv_complex_fixed.pdb', 'w'))
def _get_fixer(mol): mol.write('/tmp/tmp.pdb', format='pdb') fixer = pf.PDBFixer('/tmp/tmp.pdb') return fixer