def soup_from_restart_files(psf, in_coor, in_vel='', skip_solvent=False): """ Reads a Soup from restart files. """ soup = soup_from_psf(psf) coord_soup = pdbatoms.Soup(in_coor) for atom, coord_atom in zip(soup.atoms(), coord_soup.atoms()): p = coord_atom.pos v3.set_vector(atom.pos, p[0], p[1], p[2]) if in_vel: vel_soup = pdbatoms.Soup(in_vel) for atom, vel_atom in zip(soup.atoms(), vel_soup.atoms()): v = vel_atom.pos v3.set_vector(atom.vel, v[0], v[1], v[2]) return soup
def get_pdb_transform(pdb, center_res, top_res): """ Returns a transformation matrix that centers pdb to center_res on the z-axis and moves top_res above center_res on the y-axis """ soup = pdbatoms.Soup(pdb) atoms = soup.atoms() soup_center = pdbatoms.get_center(atoms) translation = v3.translation(-soup_center) soup.transform(translation) result = translation center_atom = find_ca_of_resname(soup.atoms(), center_res) view = v3.vector(0, 0, 1) axis = v3.cross(view, center_atom.pos) angle = v3.vec_dihedral(view, axis, center_atom.pos) rotation = v3.rotation(axis, angle) soup.transform(rotation) result = v3.combine(rotation, result) top_atom = find_ca_of_resname(soup.atoms(), top_res) top_dir = v3.vector(0, 1, 0) axis = view.copy() angle = v3.vec_dihedral(top_dir, axis, top_atom.pos) rotation2 = v3.rotation(axis, angle) result = v3.combine(rotation2, result) del soup return result
def rescale_positive_negative_bfactors_pdb( pdb, lower_bfactor, upper_bfactor): """ Returns max_bfactor after rescale """ soup = pdbatoms.Soup(pdb) bfactors = [a.bfactor for a in soup.atoms()] if upper_bfactor is None: upper_bfactor = max(bfactors) # cut-off max_values if upper_bfactor: for j in range(len(bfactors)): if bfactors[j] > upper_bfactor: bfactors[j] = upper_bfactor if bfactors[j] < -upper_bfactor: bfactors[j] = -upper_bfactor # will delete later within pymol script add_fake_water_atom(soup, 'XXX', upper_bfactor) add_fake_water_atom(soup, 'XXX', -upper_bfactor) # cut-off below min_val to zero if lower_bfactor: for j in range(len(bfactors)): if -lower_bfactor < bfactors[j] < lower_bfactor: bfactors[j] = 0.0 for a, bfactor in zip(soup.atoms(), bfactors): a.bfactor = bfactor new_pdb = util.fname_variant(pdb) soup.write_pdb(new_pdb) return new_pdb, max(bfactors)
def make_restraint_itp(restraint_pdb, force): txt = restraint_header atoms = pdbatoms.Soup(restraint_pdb).atoms() for i, atom in enumerate(atoms): if atom.bfactor > 0.0: txt += "%6s 1 %5.f %5.f %5.f\n" % (i + 1, force, force, force) return txt
def make_pdbs_png(png, pdbs, bgcolor="white", center_res=None, top_res=None, highlight_res=None, is_sticks=True, is_putty=False, width=480, height=480): if 'transparent' in bgcolor: script = 'set opaque_background, off\n' else: script = make_bgcolor_script(bgcolor) temp_fnames = [] if center_res and top_res: transform = get_pdb_transform(pdbs[0], center_res, top_res) for i in range(len(pdbs)): soup = pdbatoms.Soup(pdbs[i]) soup.transform(transform) new_pdb = util.fname_variant(pdbs[i]) soup.write_pdb(new_pdb) temp_fnames.append(new_pdb) pdbs[i] = new_pdb del soup script += make_load_pdbs_script(pdbs) script += make_separate_chain_colors_script(pdbs) if is_putty: script += make_putty_script(get_scale_max(max_bfactor, upper_bfactor)) else: script += cartoon_script if not is_sticks: script += "hide stick\n" else: script += "show stick\n" script += make_ligands_as_sticks_script(pdbs) if highlight_res: script += make_highlight_res_script(highlight_res) script += hide_backbone_sticks_script # script += "clip far, 5\n" script += "save %s\n" % png script += "quit" pml = util.fname_variant('temp.pml') open(pml, 'w').write(script) run_pymol_script(pml, width, height) temp_fnames.append(pml) util.clean_fname(*temp_fnames)
def transform_pdbs_to_residues_of_first_pdb(pdbs, center_res, top_res): transform = get_pdb_transform(pdbs[0], center_res, top_res) new_pdbs = [] for pdb in pdbs: new_pdb = util.fname_variant(pdb) soup = pdbatoms.Soup(pdb) soup.transform(transform) soup.write_pdb(new_pdb) new_pdbs.append(new_pdb) return new_pdbs
def make_chain_loading_script(pdb, basename): script = "" soup = pdbatoms.Soup(pdb) for chain_id in soup.chain_ids(): if chain_id == ' ': chain_id = 'A' chain_pdb = '%s.chain.%s.pdb' % (basename, chain_id) chain = soup.extract_chain(chain_id).write_pdb(chain_pdb) script += protein_chain_script % { 'chain_id': chain_id, 'chain_pdb': chain_pdb } return script
def rmsd_of_pdbs(pdb1, pdb2, segments1=[], segments2=[], atom_types=['CA'], transform_pdb1=None): """ Returns the RMSD between two PDB structures and optionally writes the best transformed structure of pdb1 in transform_pdb. Args: segments1 (list): list of pairs of residue names in pdb1, such as ['A:1','A:3'], interpreted as the two ends of a fragment in soup that we want the atom index of segments2 (list): same as above but for pdb2 atom_types (list): list of atom_types in the residues that we want to generate the indices from. """ soup1 = pdbatoms.Soup(pdb1) soup2 = pdbatoms.Soup(pdb2) return rmsd_of_soups(soup1, soup2, segments1, segments2, atom_types, transform_pdb1)
def calculate_periodic_box_script(parms): """ Returns namd2 input fragment to parameterize the periodic box for the protein. The requires loading the protein and directly calculating a good bounding box. """ script = new_periodic_box_script p = pdbatoms.Soup(parms['input_crds']) atoms = p.atoms() parms = {} for i_axis, axis in enumerate(['x', 'y', 'z']): vals = [a.pos[i_axis] for a in atoms] axis_min, axis_max = min(vals), max(vals) parms["len_"+axis] = axis_max - axis_min + 0.5 parms[axis+"_origin"] = sum(vals)/float(len(vals)) return script % parms
def transformed_soup_from_pdb(pdb, center_res=None, top_res=None, width=None, height=None, frame_residues=None): soup = pdbatoms.Soup(pdb) if center_res and top_res: transform = get_pdb_transform(pdb, center_res, top_res) soup.transform(transform) if frame_residues: resnames = [pymol_id_from_res_tag(r) for r in frame_residues] soup.frame_pymol_script = "zoom (%s)\n" % ' or '.join(resnames) if width: soup.width = width if height: soup.height = height return soup
def soup_from_psf(psf): """ Returns a Soup from a .psf file """ soup = pdbatoms.Soup() curr_res_num = None is_header = True for line in open(psf): if is_header: if "NATOM" in line: n_atom = int(line.split()[0]) is_header = False continue words = line.split() atom_num = int(words[0]) chain_id = words[1] res_num = int(words[2]) res_type = words[3] atom_type = words[4] charge = float(words[6]) mass = float(words[7]) if chain_id.startswith('WT') or chain_id.startswith('ION'): is_hetatm = True chain_id = " " else: is_hetatm = False chain_id = chain_id[0] if curr_res_num != res_num: res = pdbatoms.Residue(res_type, chain_id, res_num) soup.append_residue(res) curr_res_num = res_num atom = pdbatoms.Atom() atom.vel = v3.vector() atom.chain_id = chain_id atom.is_hetatm = is_hetatm atom.num = atom_num atom.res_num = res_num atom.res_type = res_type atom.type = atom_type atom.mass = mass atom.charge = charge atom.element = data.guess_element(res_type, atom_type) soup.insert_atom(-1, atom) if len(soup.atoms()) == n_atom: break convert_to_pdb_atom_names(soup) return soup
def soup_from_top_gro(top, gro, skip_solvent=False): """ Returns a Soup built from GROMACS restart files. If skip_solvent=True, will skip all solvent molecules. """ util.check_output(top) util.check_output(gro) soup = pdbatoms.Soup() soup.remaining_text = "" soup.n_remaining_text = 0 atoms = [] # Read from .gro because .top does not contain water # residue information, which is "inferred" lines = open(gro, 'r').readlines() for i_line, line in enumerate(lines[2:-1]): atom = AtomFromGroLine(line) if skip_solvent and atom.res_type == "SOL": soup.remaining_text = "".join(lines[i_line + 2:-1]) soup.n_remaining_text = len(lines[i_line + 2:-1]) break atoms.append(atom) soup.box = [float(w) for w in lines[-1].split()] for atom, (mass, q, chain_id) in zip(atoms, read_top(top)): atom.mass = mass atom.charge = q curr_res_num = -1 for a in atoms: if curr_res_num != a.res_num: res = pdbatoms.Residue(a.res_type, a.chain_id, a.res_num) soup.append_residue(res.copy()) curr_res_num = a.res_num soup.insert_atom(-1, a) convert_to_pdb_atom_names(soup) protein.find_chains(soup) return soup
def ligands_as_sticks_script(pdbs, color=""): script = "" for pdb in pdbs: name = os.path.basename(pdb).replace('.pdb', '') soup = pdbatoms.Soup(pdb) for res in soup.residues(): if res.type not in data.res_name_to_char: if res.type not in "HOH": chain_id_script = "" if res.chain_id.strip(): chain_id_script = "and chain %s" % res.chain_id script += \ "show stick, %s %s and resn %s and resi %d\n" \ % (name, chain_id_script, res.type, res.num) if color: script += \ "color %s, %s %s and resn %s and resi %d\n" \ % (color, name, chain_id_script, res.type, res.num) script += "show nonbonded\n" return script
def __init__(self, trj, n_frame_per_ps, ref_pdb): self.trj = trj self.soup = self.trj.soup self.n_frame_per_ps = n_frame_per_ps # for reference frame if ref_pdb: self.ref_soup = pdbatoms.Soup(ref_pdb) else: self.ref_soup = self.soup.copy() # output files fname = trj.basename + '.' + self.var_name + '.per_frame' self.file_per_frame = open(fname, 'w') fname = trj.basename + '.' + self.var_name + '.per_ps' self.file_per_ps = open(fname, 'w') # cumul results for ps save self.cumul_results = None self.n_cumul_frame = 0
def rescale_positive_bfactors_pdb(pdb, lower_bfactor, upper_bfactor): """ Returns max_bfactor after rescale (needed for worm calculation) """ soup = pdbatoms.Soup(pdb) bfactors = [a.bfactor for a in soup.atoms()] # cut-off max_values if upper_bfactor: bfactors = [upper_bfactor if b > upper_bfactor else b for b in bfactors] # will delete later within pymol script add_fake_water_atom(soup, 'XXX', upper_bfactor) # cut-off below min_val to zero if lower_bfactor: for j in range(len(bfactors)): bfactors = [0 if b < lower_bfactor else b for b in bfactors] for a, bfactor in zip(soup.atoms(), bfactors): a.bfactor = bfactor new_pdb = util.fname_variant(pdb) soup.write_pdb(new_pdb) return new_pdb, max(bfactors)
def make_disulfide_script(pdb): """ Returns the psfgen script for disulfide bonds. This function opens in_pdb in a soup object, and searches for CYS residues where the SG-SG distance < 3 angs. These residues are then renamed to CYX and written to out_pdb. The disulfide bonds are then returned in a .tleap script fragment. """ soup = pdbatoms.Soup(pdb) n = len(soup.residues()) # First generate the residue names recognized by psfgen res_names = [] chain_id = None i_res = None for i in range(n): res = soup.residue(i) if res.chain_id != chain_id: chain_id = res.chain_id i_res = 1 res_names.append("%s:%s" % (chain_id, i_res)) i_res += 1 # Then search through for all CYS-CYS pairs and identify disulfide bonds script = "" for i in range(n): for j in range(i+1, n): if soup.residue(i).type in 'CYS' and soup.residue(j).type in 'CYS': sg1 = soup.residue(i).atom('SG') sg2 = soup.residue(j).atom('SG') if v3.distance(sg1.pos, sg2.pos) < 3.0: script += "patch DISU %s %s\n" % (res_names[i], res_names[j]) if script: script = "# disulfide bonds\n" + script + "\n" return script
def disulfide_script_and_rename_cysteines(in_pdb, out_pdb): """ Returns the tleap script for disulfide bonds in the in_pdb file. This function opens in_pdb in a Soup, and searches for CYS residues where the SG-SG distance < 3 angs. These residues are then renamed to CYX and written to out_pdb. The disulfide bonds are then returned in a .tleap script fragment. """ soup = pdbatoms.Soup(in_pdb) script = " # disulfide bonds\n" n = len(soup.residues()) for i in range(n): for j in range(i + 1, n): if soup.residue(i).type in 'CYS' and soup.residue(j).type in 'CYS': p1 = soup.residue(i).atom('SG').pos p2 = soup.residue(j).atom('SG').pos if v3.distance(p1, p2) < 3.0: soup.residue(i).set_type('CYX') soup.residue(j).set_type('CYX') script += "bond pdb.%d.SG pdb.%d.SG\n" % (i + 1, j + 1) soup.write_pdb(out_pdb) util.check_output(out_pdb) return script
def soup_from_topology(topology): """ Returns a Soup from a topology dictionary. """ soup = pdbatoms.Soup() chain_id = '' n_res = topology['NRES'] n_atom = topology['NATOM'] for i_res in range(n_res): res_type = topology['RESIDUE_LABEL'][i_res].strip() if res_type == "WAT": res_type = "HOH" res = pdbatoms.Residue(res_type, chain_id, i_res + 1) soup.append_residue(res) res = soup.residue(i_res) i_atom_start = topology['RESIDUE_POINTER'][i_res] - 1 if i_res == n_res - 1: i_atom_end = n_atom else: i_atom_end = topology['RESIDUE_POINTER'][i_res + 1] - 1 for i_atom in range(i_atom_start, i_atom_end): atom = pdbatoms.Atom() atom.vel = v3.vector() atom.num = i_atom + 1 atom.res_num = i_res + 1 atom.res_type = res_type atom.type = topology['ATOM_NAME'][i_atom].strip() atom.mass = topology['MASS'][i_atom] atom.charge = topology['CHARGE'][i_atom] / sqrt_of_k atom.element = data.guess_element(atom.res_type, atom.type) soup.insert_atom(-1, atom) convert_to_pdb_atom_names(soup) if topology['IFBOX'] > 0: # create dummy dimension to ensure box dimension recognized soup.box_dimension_str = "1.000000 1.0000000 1.000000" return soup
def run(in_parms): """ Run a AMBER simulations using the PDBREMIX in_parms dictionary. """ parms = copy.deepcopy(in_parms) basename = parms['output_basename'] # Copies across topology file input_top = parms['topology'] util.check_files(input_top) new_top = basename + '.top' shutil.copy(input_top, new_top) # Copies over coordinate/velocity files input_crd = parms['input_crds'] util.check_files(input_crd) if input_crd.endswith('.crd'): new_crd = basename + '.in.crd' else: new_crd = basename + '.in.rst' shutil.copy(input_crd, new_crd) # Decide on type of output coordinate/velocity file if 'n_step_minimization' in parms: rst = basename + ".crd" else: rst = basename + ".rst" # Construct the long list of arguments for sander trj = basename + ".trj" vel_trj = basename + ".vel.trj" ene = basename + ".ene" inf = basename + ".inf" sander_out = basename + ".sander.out" sander_in = basename + ".sander.in" args = "-O -i %s -o %s -p %s -c %s -r %s -x %s -v %s -e %s -inf %s" \ % (sander_in, sander_out, new_top, new_crd, rst, trj, vel_trj, ene, inf) # Make the input script script = make_sander_input_file(parms) # If positional restraints if parms['restraint_pdb']: # Generate the AMBER .crd file that stores the constrained coordinates pdb = parms['restraint_pdb'] soup = pdbatoms.Soup(pdb) ref_crd = basename + '.restraint.crd' write_soup_to_rst(soup, ref_crd) util.check_output(ref_crd) # Add the restraints .crd to the SANDER arguments args += " -ref %s" % ref_crd # Add the restraint forces and atom indices to the SANDER input file script += make_restraint_script(pdb, parms['restraint_force']) open(sander_in, "w").write(script) # Run the simulation data.binary('sander', args, basename) # Check if output is okay util.check_output(sander_out, ['FATAL']) top, crds, vels = get_restart_files(basename) util.check_output(top) util.check_output(crds)
def run_tleap(force_field, pdb, name, solvent_buffer=0.0, excess_charge=0): """ Generates AMBER topology and coordinate files from PDB. Depending on whether excess_charge is non-zero, will also generate counterions. If solvent_buffer is non-zero, will generate explicit waters, otherwise, no waters generated. No waters is used for implicit solvent simulations. """ util.check_output(pdb) # Remove all but protein heavy atoms in a single clean conformation tleap_pdb = name + '.clean.pdb' pdbtext.clean_pdb(pdb, tleap_pdb) # The restart files to be generated top = name + '.top' crd = name + '.crd' # Dictionary to substitute into tleap scripts params = { 'top': top, 'crd': crd, 'pdb': tleap_pdb, 'data_dir': data.data_dir, 'solvent_buffer': solvent_buffer, } # use best force-field for the 2 versions of AMBER author has tested if 'AMBER11' in force_field: params['amber_ff'] = "leaprc.ff99SB" elif 'AMBER8' in force_field: params['amber_ff'] = "leaprc.ff96" else: raise Exception("Don't know which version of AMBER(8|11) to use.") # make the tleap input script script = force_field_script # check for a few non-standard residue that have been included residues = [r.type for r in pdbatoms.Soup(tleap_pdb).residues()] if 'PHD' in residues: leaprc = open("%s/phd.leaprc" % data.data_dir).read() script += leaprc if 'ZNB' in residues: leaprc = open("%s/znb.leaprc" % data.data_dir).read() script += leaprc script += "pdb = loadpdb %(pdb)s\n" script += disulfide_script_and_rename_cysteines(tleap_pdb, tleap_pdb) if 'GBSA' not in force_field: # Add explicit waters as not GBSA implicit solvent if excess_charge != 0: # Add script to add counterions, must specify + or - if excess_charge > 0: script += "addions pdb Cl- 0\n" else: script += "addions pdb Na+ 0\n" solvent_buffer = 10 params['solvent_buffer'] = solvent_buffer script += explicit_water_box_script script += save_and_quit_script script = script % params # Now write script to input file tleap_in = name + ".tleap.in" open(tleap_in, "w").write(script) # Now run tleap with tleap_in data.binary('tleap', "-f " + tleap_in, name + '.tleap') # Check output is okay if os.path.isfile('leap.log'): os.rename('leap.log', name + '.tleap.log') util.check_output(name + '.tleap.log', ['FATAL']) util.check_output(top) util.check_output(crd) return top, crd