def extract_ligand_records(self, pdb_file, relevant_ligands): """ Extract all instances of fragment-containing small molecules bound to a PDB :param pdb_file: Path to the PDB file containing the fragment-containing ligand :param relevant_ligands: dict of Ideal_Ligand_PDB_Containers for ligands to find in pdb_file :return: relevant_ligands_prody_dict - structured [resname, chain, resnum] : ligand prody """ pdb_header = prody.parsePDB(pdb_file, model=0, header=True) # Find resnums for all instances of fragment-containing small molecules bound to this PDBrelevant_ligands relevant_ligand_resnums = [(res.chain, res.resnum, res) for res in pdb_header['chemicals'] if res.resname in relevant_ligands.keys()] # Pull all relevant ligands from PDB as a prody AtomGroup objects pdb_prody_hv = prody.parsePDB(pdb_file, altloc=True).getHierView() relevant_ligands_prody_dict = dict() for ligand_chain, ligand_resnum, res in relevant_ligand_resnums: ligand_pdb_prody = pdb_prody_hv.getResidue(ligand_chain, ligand_resnum) # Issues occur when alternate coordinates are parsed for a ligand... we don't want those anyways if ligand_pdb_prody is None: continue relevant_ligands_prody_dict[(res.resname, ligand_chain, ligand_resnum)] = ligand_pdb_prody return relevant_ligands_prody_dict
def get_alphashape(pdb, chain=None, plot=False): ''' Returns an AlphaShape object of a pdb file, outlining its general shape for use in a clash filter. ''' if chain: atoms = prody.parsePDB(pdb, chain=chain) else: atoms = prody.parsePDB(pdb) atoms = atoms.select('not chain A') # For some reason there is a level which is not populated. This may # be for proteins w/ multiple chains. coordsets = atoms.getCoordsets() coords = [] for coordset in coordsets: coords.extend(coordset) # coords = [(0., 0.), (0., 1.), (1., 1.), (1., 0.), (0.5, 0.5)] alpha_shape = alphashape.alphashape(coords, 0.18) if plot: helix = prody.parsePDB(pdb, chain='A') helixcoords = helix.getCoordsets()[0] fig = plt.figure() # ax = fig.add_subplot(projection='3d') ax = Axes3D(fig) ax.scatter(*zip(*coords)) ax.scatter(*zip(*helixcoords)) # # ax.add_patch(PolygonPatch(alpha_shape, alpha=0.2)) ax.plot_trisurf(*zip(*alpha_shape.vertices), triangles=alpha_shape.faces, alpha=0.3) plt.show() return alpha_shape
def prepare_pdb22(self, out_prefix, csets=None, **kwargs): csets = self._make_csets(csets) nmin, psf = self._prepare_pdb22_one_frame(out_prefix, **kwargs) nmin_ag = prody.parsePDB(nmin) if len(csets) == 1: self.ag = nmin_ag self.save(nmin) return self, psf if nmin_ag.numAtoms() == self.ag.numAtoms(): if list(nmin_ag.getNames()) != list(self.ag.getNames()): nmin_ag = self._match_by_residue_position(self.ag, nmin_ag) else: nmin_ag.setCoords(self.ag.getCoordsets()) else: logger.info('Molecule was altered during preparation, preparing each frame separately') new_csets = [] for cset in csets: nmin_frame, psf_frame = self._prepare_pdb22_one_frame(out_prefix + '-%i-tmp' % cset, cset=0, **kwargs) ag_frame = prody.parsePDB(nmin_frame) assert (list(nmin_ag.getNames()) == list(ag_frame.getNames())) new_csets.append(ag_frame.getCoords()) nmin_frame.remove() psf_frame.remove() nmin_ag.setCoords(np.array(new_csets)) self.ag = nmin_ag self.save(nmin) return self, psf
def get_vdms(self, df, path_to_vdm=None): path = path_to_vdm or self._directory.split('csv')[0] + 'vdM/' with os.scandir(path) as it: for entry in it: if entry.name[0] != '.': filename_end = '_'.join(entry.name.split('_')[4:]) break if 'query_name' in df.columns: for n, row in df[['iFG_count', 'vdM_count', 'query_name']].iterrows(): try: yield pr.parsePDB(path + 'iFG_' + str(row['iFG_count']) + '_vdM_' + str(row['vdM_count']) + '_' + filename_end) except Exception: traceback.print_exc() else: for n, row in df[['iFG_count', 'vdM_count']].iterrows(): try: yield pr.parsePDB(path + 'iFG_' + str(row['iFG_count']) + '_vdM_' + str(row['vdM_count']) + '_' + filename_end) except Exception: traceback.print_exc()
def native_contact(rec_path, reorder_path, dock_path): parsed_docked = prody.parsePDB(dock_path).select('not hydrogen') parsed_crystal = prody.parsePDB(reorder_path).select('not hydrogen') parsed_rec = prody.parsePDB(rec_path).select('not hydrogen') cry_atom_num = parsed_crystal.numAtoms() lig_atom_num = parsed_docked.numAtoms() assert cry_atom_num == lig_atom_num docked_coords = parsed_docked.getCoordsets() crystal_coord = parsed_crystal.getCoords() rec_coord = parsed_rec.getCoords() exp_crystal_coord = np.expand_dims(crystal_coord, -2) cry_diff = exp_crystal_coord - rec_coord cry_distance = np.sqrt(np.sum(np.square(cry_diff), axis=-1)) exp_docked_coords = np.expand_dims(docked_coords, -2) docked_diff = exp_docked_coords - rec_coord docked_distance = np.sqrt(np.sum(np.square(docked_diff), axis=-1)) cry_contact = (cry_distance < distance_threshold).astype(int) num_contact = np.sum(cry_contact).astype(float) lig_contact = (docked_distance < distance_threshold).astype(int) contact_ratio = np.sum(cry_contact * lig_contact, axis=(-1, -2)) / num_contact return [list(contact_ratio)]
def __init__(self, pdb_file=None, pdb_list=None, ag=None): if pdb_file is not None: if isinstance(pdb_file, prody.Atomic): self.ag = pdb_file elif isinstance(pdb_file, str): self.ag = prody.parsePDB(pdb_file) else: raise ValueError('Wrong type of parameter `pdb_file` ({})'.format(type(pdb_file))) elif pdb_list is not None: ag_first = prody.parsePDB(pdb_list[0]) new_csets = [] for f in pdb_list: ag = prody.parsePDB(f) assert (list(ag.getNames()) == list(ag_first.getNames())) new_csets.append(ag.getCoords()) ag_first.setCoords(np.array(new_csets)) self.ag = ag_first elif ag is not None: self.ag = ag.copy() else: raise ValueError('No molecules specified')
def overlap(reorder_path, dock_path): """ calculate overlap for the docking result args: reorder_path:: str path of reorder ligand dock_path:: str path of docking result returns: overlap:: float overlap value """ docked_coords = prody.parsePDB(dock_path).getCoordsets() crystal_coords = prody.parsePDB(reorder_path).getCoords() expanded_docked = np.expand_dims(docked_coords, -2) diff = expanded_docked - crystal_coords distance = np.sqrt(np.sum(np.power(diff, 2), axis=-1)) all_clash = (distance < clash_cutoff_A).astype(float) atom_clash = (np.sum(all_clash, axis=-1) > 0).astype(float) position_clash_ratio = np.mean(atom_clash, axis=-1) return [list(position_clash_ratio)]
def assign_pcs(args): fn, topf, eda, pcs, sel, outf = args if fn.endswith("pdb"): pdb = prody.parsePDB(fn) pdb = pdb.select(sel).copy() ensemble = prody.Ensemble('A single pdb file ensemble') ensemble.setCoords(pdb.getCoords()) ensemble.addCoordset(pdb.getCoordsets()) ensemble.iterpose() PCs = prody.calcProjection(ensemble, eda[pcs]) print(PCs) return elif fn.endswith(".dcd"): structure = prody.parsePDB(topf) str_sel = structure.select(sel) #dcd = prody.DCDFile(fn) dcd = prody.Trajectory(fn) dcd.link(structure) dcd.setCoords(structure) dcd.setAtoms(str_sel) PCs = prody.calcProjection(dcd, eda[pcs]) if outf is not None: header = " ".join(["PC%d" % (i + 1) for i in pcs]) np.savetxt(outf, PCs, fmt="%.4f", header=header, comments="") else: print("Unsupport file type: %s" % fn) return None return PCs
def createPCAMOdes(base_path, protein_list): for protein in protein_list: receptor = os.path.join(base_path, protein) + "/{}A-unbound.pdb".format(protein) ligand = os.path.join(base_path, protein) + "/{}B-unbound.pdb".format(protein) pca_rec_folder = "{}/{}/input/pca/concoord/receptor".format( base_path, protein) pca_lig_folder = "{}/{}/input/pca/concoord/ligand".format( base_path, protein) dist_rec = "{}/{}A-dist".format(pca_rec_folder, protein) dist_lig = "{}/{}B-dist".format(pca_lig_folder, protein) disco_rec = "{}/{}A-disco.pdb".format(pca_rec_folder, protein) disco_lig = "{}/{}B-disco.pdb".format(pca_lig_folder, protein) nmdfile_rec = "{}/{}A-nmd".format(pca_rec_folder, protein) nmdfile_lig = "{}/{}B-nmd".format(pca_lig_folder, protein) os.system("mkdir -p {}".format(pca_rec_folder)) os.system("mkdir -p {}".format(pca_lig_folder)) #pwd = os.getcwd() os.chdir(pca_rec_folder) p = Popen([ "/home/glenn/Documents/Masterarbeit/concoord/bin/dist", "-p", receptor ], stdin=PIPE) #, shell=True #,"-op",dist_rec p.communicate(input=b'1\n1\n') os.system( "/home/glenn/Documents/Masterarbeit/concoord/bin/disco -on {} -n 200 -i 1000 -viol 1. -bump " .format(disco_rec)) os.chdir(pca_lig_folder) p = Popen([ "/home/glenn/Documents/Masterarbeit/concoord/bin/dist", "-p", ligand ], stdin=PIPE) #, shell=True p.communicate(input=b'1\n1\n') os.system( "/home/glenn/Documents/Masterarbeit/concoord/bin/disco -on {} -n 200 -i 1000 -viol 1. -bump " .format(disco_lig)) try: pca_rec = calcPCA(disco_rec) atoms_rec = dy.parsePDB(receptor, subset='ca') dy.writeNMD(nmdfile_rec, pca_rec, atoms_rec) except: pass try: pca_lig = calcPCA(disco_lig) atoms_lig = dy.parsePDB(ligand, subset='ca') dy.writeNMD(nmdfile_lig, pca_lig, atoms_lig) except: pass
def rmsd(reorder_path, dock_path): docked_coords = prody.parsePDB(dock_path).getCoordsets() crystal_coords = prody.parsePDB(reorder_path).getCoords() rmsd = np.sqrt(np.mean(np.sum(np.square(docked_coords - crystal_coord), axis=1), axis=-1)) return [list(rmsd)]
def Resid(self, pdbFile, pdbChain1, pdbChain2, ligandName, runfolder): """ Return a list of residue in `Receptor` which distance from `Ligand` is less or equal 5 angstroms. """ # print "wthelelelelle" print self.runfolder os.chdir(runfolder) # acpype = glob('*.acpype') if pdbChain2 != '': a = prody.parsePDB( str(pdbFile)).select('(' + pdbChain1 + ')' + ' and within 5 of chain ' + pdbChain2) residList = np.array(list(sorted(set(a.getResnums()))), dtype='str') residlist = " ".join(residList) # for i in range(1, len(residList)): # residlist += ' ' + str(residList[i]) with open('cutoff-resid-5angstroms', 'w') as residfile: residfile.write(residlist) return residList, str(runfolder + '/cutoff-resid-5angstroms') elif ligandName != '': receptor = prody.parsePDB(str(pdbFile)) Ligand = [] # for i in range(len(acpype)): # Ligand.append(str(acpype[i].strip('.acpype'))) # print "is it a bug here??" # print Ligand # ligand = [] # for i in range(len(Ligand)): Ligand.append( prody.parsePDB( str(ligandName) + '.acpype/' + str(ligandName) + '_NEW.pdb')) # print ligand protein = receptor # print "this is protein before add ligand[i]" # print protein haha = [] for i in range(len(Ligand)): protein += Ligand[i] haha = np.array(list(sorted(set(Ligand[i].getResnames()))), dtype='str') # print "this is protein after add ligand[i]" # print protein # print haha # print type(haha) ligands = ' or resname '.join(haha) hoho = protein.select('(' + pdbChain1 + ')' + ' and within 5 of resname ' + ligands) residList = list(sorted(set(hoho.getResnums()))) residList = np.array(residList, dtype='str') residlist = " ".join(residList) # print residlist # for i in range(1, len(residList)): # residlist += ' ' + str(residList[i]) with open('cutoff-resid-5angstroms', 'w') as residfile: residfile.write(residlist) return residList, str(runfolder + '/cutoff-resid-5angstroms')
def write_superposed_pdbs(self, output_pdb_folder, alignments: dict = None): """ Superposes PDBs according to alignment and writes transformed PDBs to files (View with Pymol) Parameters ---------- alignments output_pdb_folder """ if alignments is None: alignments = self.alignment output_pdb_folder = Path(output_pdb_folder) if not output_pdb_folder.exists(): output_pdb_folder.mkdir() reference_name = self.structures[0].name reference_pdb = pd.parsePDB( str(self.output_folder / f"cleaned_pdb/{self.structures[0].name}.pdb") ) core_indices = np.array( [ i for i in range(len(alignments[reference_name])) if -1 not in [alignments[n][i] for n in alignments] ] ) aln_ref = alignments[reference_name] ref_coords_core = ( reference_pdb[helper.get_alpha_indices(reference_pdb)] .getCoords() .astype(np.float64)[np.array([aln_ref[c] for c in core_indices])] ) ref_centroid = helper.nb_mean_axis_0(ref_coords_core) ref_coords_core -= ref_centroid transformation = pd.Transformation(np.eye(3), -ref_centroid) reference_pdb = pd.applyTransformation(transformation, reference_pdb) pd.writePDB(str(output_pdb_folder / f"{reference_name}.pdb"), reference_pdb) for i in range(1, len(self.structures)): name = self.structures[i].name pdb = pd.parsePDB( str(self.output_folder / f"cleaned_pdb/{self.structures[i].name}.pdb") ) aln_name = alignments[name] common_coords_2 = ( pdb[helper.get_alpha_indices(pdb)] .getCoords() .astype(np.float64)[np.array([aln_name[c] for c in core_indices])] ) ( rotation_matrix, translation_matrix, ) = superposition_functions.svd_superimpose( ref_coords_core, common_coords_2 ) transformation = pd.Transformation(rotation_matrix.T, translation_matrix) pdb = pd.applyTransformation(transformation, pdb) pd.writePDB(str(output_pdb_folder / f"{name}.pdb"), pdb)
def paste_loop(path_to_loop, path_to_pdb, query_selection_N, query_selection_C, query_length_N=4, query_length_C=4, include_sidechains=False): loop = pr.parsePDB(path_to_loop) loop.setSegnames('A') loop_bb = loop.select('backbone') pdb = pr.parsePDB(path_to_pdb) query_N = pdb.select(query_selection_N) query_N_bb = query_N.select('name N C CA') query_C = pdb.select(query_selection_C) query_C_bb = query_C.select('name N C CA') first_resnum_loop = loop_bb.getResnums()[0] last_resnum_loop = loop_bb.getResnums()[-1] loop_N_bb = loop_bb.select('name N C CA and resnum `' + str(first_resnum_loop) + 'to' + str(first_resnum_loop + query_length_N - 1) + '`') loop_C_bb = loop_bb.select('name N C CA and resnum `' + str(last_resnum_loop - query_length_C + 1) + 'to' + str(last_resnum_loop) + '`') try: coords_diff_N = loop_N_bb.getCoords() - query_N_bb.getCoords() coords_diff_C = loop_C_bb.getCoords() - query_C_bb.getCoords() except ValueError: print('Loop failure') ind_match_N = np.argmin([np.linalg.norm(i) for i in coords_diff_N]) ind_match_C = np.argmin([np.linalg.norm(i) for i in coords_diff_C]) loop_N_bb_index = loop_N_bb.getIndices()[ind_match_N] loop_C_bb_index = loop_C_bb.getIndices()[ind_match_C] query_N_bb_index = query_N_bb.getIndices()[ind_match_N] query_C_bb_index = query_C_bb.getIndices()[ind_match_C] first_index_pdb = pdb.select('backbone').getIndices()[0] last_index_pdb = pdb.select('backbone').getIndices()[-1] loop_slice = loop_bb.select('index ' + str(loop_N_bb_index) + 'to' + str(loop_C_bb_index)) if not include_sidechains: pdb_N = pdb.select('backbone and index ' + str(first_index_pdb) + 'to' + str(query_N_bb_index - 1)) pdb_C = pdb.select('backbone and index ' + str(query_C_bb_index + 1) + 'to' + str(last_index_pdb)) else: pdb_N = pdb.select('index ' + str(first_index_pdb) + 'to' + str(query_N_bb_index - 1)) pdb_C = pdb.select('index ' + str(query_C_bb_index + 1) + 'to' + str(last_index_pdb)) return pdb_N, loop_slice, pdb_C
def _import_pdbs(self): """ For each fragment ensemble, converts each residue in all processed PDBs into objects with representative matrices and other relevant information :return: """ processsed_residue_list = [] for pdb in pdb_check(self.processed_PDBs_dir): # Make sure I can load things... try: prody_protein = prody.parsePDB(pdb) # Check that residues exist within cutoff distance provided in alignments, otherwise pass prody_protein_selection = prody_protein.select( 'protein and not hetatm') if prody_protein_selection == None: continue else: prody_protein_hv = prody_protein_selection.getHierView() except Exception as e: print(e) continue pdb_info = os.path.basename(os.path.normpath(pdb)) prody_ligand = prody.parsePDB(pdb).select( 'hetatm and resname {}'.format(pdb_info.split('_')[1])) # todo: CATCH THIS!!! if prody_ligand is None: continue # Iterate over residues in contacts and generate representative vector with weights applied processsed_residue_list += [ fragment_PDB( residue, pdb_info, prody_ligand, ) for residue in prody_protein_hv.iterResidues() ] processsed_residue_list_cleaned = [ residue for residue in processsed_residue_list if residue.viable is not None ] print( f'Unique processed and viable residues: {len(processsed_residue_list_cleaned)}' ) return processsed_residue_list_cleaned
def get_chain_from_astral_id(astral_id, d): """Given an ASTRAL ID and the ASTRAL->PDB/chain mapping dictionary, this function attempts to return the relevant, parsed ProDy object.""" pdbid, chain = d[astral_id] assert "," not in chain, f"Issue parsing {astral_id} with chain {chain} and pdbid " \ f"{pdbid}." chain, resnums = chain.split(":") if astral_id == "d4qrye_" or astral_id in ASTRAL_IDS_INCORRECTLY_PARSED: chain = "A" resnums = "" # Handle special case https://github.com/prody/ProDy/issues/1197 if astral_id == "d1tocr1": # a = pr.performDSSP("1toc") a = pr.parsePDB("1toc", chain="R") a = a.select("(chain R) and (resnum 2 to 59 or resnum 1A)" ) # Note there is no 1B return a a = pr.parsePDB(pdbid, chain=chain) if resnums != "": # This pattern matches ASTRAL number ranges like 1-100, 1A-100, -1-39, -4--1, etc. p = re.compile( r"((?P<d1>-?\d+)(?P<ic1>\w?))-((?P<d2>-?\d+)(?P<ic2>\w?))") match = p.match(resnums) start, start_icode = int(match.group("d1")), match.group("ic1") end, end_icode = int(match.group("d2")), match.group("ic2") # Ranges with negative numbers must be escaped with ` character range_str = f"{start} to {end}" if start < 0 or end < 0: range_str = f"`{range_str}`" if not start_icode and not end_icode: # There are no insertion codes. Easy case. selection_str = f"resnum {range_str}" elif (start_icode and not end_icode) or (not start_icode and end_icode): # If there's only one insertion code, this selection is not well defined # and must be handled by special cases above. raise ValueError(f"Unsupported ASTRAL range {astral_id}.") elif start_icode and end_icode: if start_icode == end_icode: selection_str = f"resnum {range_str} and icode {start_icode}" else: raise ValueError(f"Unsupported ASTRAL range {astral_id}.") a = a.select(selection_str) return a
def fix_openmm(): # get the whole crystal structure # get only the ATOM records # and HETAM records for MSE # convert MSE to MET with open('no_smet.pdb', 'w') as outfile: with open('experimental.pdb') as infile: for line in infile: if line.startswith('ATOM'): outfile.write(line) if line.startswith('HETATM'): if line[17:20] == 'MSE': atom_name = line[12:17] if atom_name == 'SE ': atom_name = ' SD ' line_fixed = 'ATOM ' + line[ 6:12] + atom_name + 'MET' + line[20:67] + '\n' outfile.write(line_fixed) # load the file into prody p = prody.parsePDB('no_smet.pdb') p = p.select('not hydrogen') # get one of the rosetta models r = prody.parsePDB('rosetta.pdb') # perform an alignment to find out what part of the crystal structure # corresponds to the rosetta file match = prody.matchChains(r, p, subset='all', overlap=25, pwalign=True)[0][1] print len(match) prody.writePDB('chain.pdb', match) # now clean it up with pdb fixer subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb', shell=True) # now load it with zam p = protein.Protein('output.pdb') p.Dehydrogen() disulfide_pairs = find_disulfide(p) for r1, r2 in disulfide_pairs: print ' added disulfide between {} and {}'.format(r1, r2) p.Res[r1].FullName = 'CYX' p.Res[r2].FullName = 'CYX' p.WritePdb('start.pdb') # now run tleap print ' running tleap' run_tleap(disulfide_pairs)
def prody_contacts(**kwargs): """Identify contacts of a target structure with one or more ligands. Contacting atoms (or extended subset of atoms, such as residues) are outputted in PDB file format. :arg target: target PDB identifier or filename :arg ligand: ligand PDB identifier(s) or filename(s) :arg select: atom selection string for target structure :arg radius: contact radius (Å), default is ``4.0`` :arg extend: output same ``'residue'``, ``'chain'``, or ``'segment'`` along with contacting atoms :arg prefix: prefix for output file, default is *target* filename :arg suffix: output filename suffix, default is *ligand* filename""" import prody LOGGER = prody.LOGGER target = prody.parsePDB(kwargs['target']) title = kwargs.get('prefix') or target.getTitle() selstr = kwargs.get('select') if selstr: target = target.select(selstr) contacts = prody.Contacts(target) suffix = kwargs.get('suffix', '_contacts') extend = kwargs.get('extend') radius = float(kwargs.get('radius', 4.0)) ligands = kwargs.get('ligand') if len(ligands) > 1: outfn = lambda fn: title + suffix + '_' + fn + '.pdb' else: outfn = lambda fn: title + suffix + '.pdb' for pdb in ligands: ligand = prody.parsePDB(pdb) sel = contacts(radius, ligand) if sel: LOGGER.info('{0} atoms from {1} contact {2}.'.format( len(sel), pdb, str(target))) if extend: sel = target.select('same ' + extend + ' as sel', sel=sel) LOGGER.info('Selection is extended to {0} atoms of the same ' '{1}(s).'.format(len(sel), extend)) pdbfn = outfn(ligand.getTitle()) LOGGER.info('Writing contacts into ' + pdbfn) prody.writePDB(pdbfn, sel)
def rmsd(bucket, table_idx, param, input_data): ''' Calculate rmsd and insert the result into database Args: table_idx: int, id for native contact table param: dict, parameters { 'input_docked_foler':'...', 'input_crystal_folder':'...', } input_data: list [receptor, chain, resnum ,resname] Returns: ''' try: receptor, chain, resnum, resname = input_data input_docked_folder = param['input_docked_folder'] input_crystal_folder = param['input_crystal_folder'] lig_name = '_'.join([receptor, chain, resnum, resname, 'ligand' ]) + '.pdb' input_docked_dir = os.path.join(data_dir, input_docked_folder, receptor) input_docked_path = os.path.join(input_docked_dir, lig_name) input_crystal_dir = os.path.join(data_dir, input_crystal_folder, receptor) input_crystal_path = os.path.join(input_crystal_dir, lig_name) docked_coords = prody.parsePDB(input_docked_path).getCoordsets() crystal_coord = prody.parsePDB(input_crystal_path).getCoords() rmsd = np.sqrt( np.mean(np.sum(np.square(docked_coords - crystal_coord), axis=1), axis=-1)) # todo (maksym) RMSDs not rmsd records = [] for i, rd in enumerate(rmsd): records.append(input_data + [i + 1, rd, 1, 'success']) db.insert(table_idx, records, bucket=bucket) except Exception as e: record = input_data + [1, 0, 0, str(e)] records = [record] db.insert(table_idx, records, bucket=bucket)
def prody_contacts(**kwargs): """Identify contacts of a target structure with one or more ligands. Contacting atoms (or extended subset of atoms, such as residues) are outputted in PDB file format. :arg target: target PDB identifier or filename :arg ligand: ligand PDB identifier(s) or filename(s) :arg select: atom selection string for target structure :arg radius: contact radius (Å), default is ``4.0`` :arg extend: output same ``'residue'``, ``'chain'``, or ``'segment'`` along with contacting atoms :arg prefix: prefix for output file, default is *target* filename :arg suffix: output filename suffix, default is *ligand* filename""" import prody LOGGER = prody.LOGGER target = prody.parsePDB(kwargs['target']) title = kwargs.get('prefix') or target.getTitle() selstr = kwargs.get('select') if selstr: target = target.select(selstr) contacts = prody.Contacts(target) suffix = kwargs.get('suffix', '_contacts') extend = kwargs.get('extend') radius = float(kwargs.get('radius', 4.0)) ligands = kwargs.get('ligand') if len(ligands) > 1: outfn = lambda fn: title + suffix + '_' + fn + '.pdb' else: outfn = lambda fn: title + suffix + '.pdb' for pdb in ligands: ligand = prody.parsePDB(pdb) sel = contacts(radius, ligand) if sel: LOGGER.info('{0} atoms from {1} contact {2}.' .format(len(sel), pdb, str(target))) if extend: sel = target.select('same ' + extend + ' as sel', sel=sel) LOGGER.info('Selection is extended to {0} atoms of the same ' '{1}(s).'.format(len(sel), extend)) pdbfn = outfn(ligand.getTitle()) LOGGER.info('Writing contacts into ' + pdbfn) prody.writePDB(pdbfn, sel)
def Resid(self, pdbFile, pdbChain1, pdbChain2, ligandName, runfolder): """ Return a list of residue in `Receptor` which distance from `Ligand` is less or equal 5 angstroms. """ # print "wthelelelelle" print self.runfolder os.chdir(runfolder) # acpype = glob('*.acpype') if pdbChain2 != '': a = prody.parsePDB(str(pdbFile)).select('('+pdbChain1+')' + ' and within 5 of chain ' + pdbChain2) residList = np.array(list(sorted(set(a.getResnums()))), dtype='str') residlist = " ".join(residList) # for i in range(1, len(residList)): # residlist += ' ' + str(residList[i]) with open('cutoff-resid-5angstroms', 'w') as residfile: residfile.write(residlist) return residList, str(runfolder + '/cutoff-resid-5angstroms') elif ligandName != '': receptor = prody.parsePDB(str(pdbFile)) Ligand = [] # for i in range(len(acpype)): # Ligand.append(str(acpype[i].strip('.acpype'))) # print "is it a bug here??" # print Ligand # ligand = [] # for i in range(len(Ligand)): Ligand.append(prody.parsePDB(str(ligandName) + '.acpype/' + str(ligandName) + '_NEW.pdb')) # print ligand protein = receptor # print "this is protein before add ligand[i]" # print protein haha = [] for i in range(len(Ligand)): protein += Ligand[i] haha = np.array(list(sorted(set(Ligand[i].getResnames()))), dtype='str') # print "this is protein after add ligand[i]" # print protein # print haha # print type(haha) ligands = ' or resname '.join(haha) hoho = protein.select('('+pdbChain1+')' + ' and within 5 of resname ' + ligands) residList = list(sorted(set(hoho.getResnums()))) residList = np.array(residList, dtype='str') residlist = " ".join(residList) # print residlist # for i in range(1, len(residList)): # residlist += ' ' + str(residList[i]) with open('cutoff-resid-5angstroms', 'w') as residfile: residfile.write(residlist) return residList, str(runfolder + '/cutoff-resid-5angstroms')
def rmsd(reorder_outpath, dock_outpath, init='rmsd_init'): init = eval(init) reorder_path = os.path.join(init.data_dir, reorder_outpath) dock_path = os.path.join(init.data_dir, dock_outpath) docked_coords = prody.parsePDB(dock_path).getCoordsets() crystal_coords = prody.parsePDB(reorder_path).getCoords() rmsd = np.sqrt( np.mean(np.sum(np.square(docked_coords - crystal_coord), axis=1), axis=-1)) return [list(rmsd)]
def overlap(reorder_path, dock_path): docked_coords = prody.parsePDB(dock_path).getCoordsets() crystal_coords = prody.parsePDB(reorder_path).getCoords() expanded_docked = np.expand_dims(docked_coords, -2) diff = expanded_docked - crystal_coords distance = np.sqrt(np.sum(np.power(diff, 2), axis=-1)) all_clash = (distance < clash_cutoff_A).astype(float) atom_clash = (np.sum(all_clash, axis=-1) > 0).astype(float) position_clash_ratio = np.mean(atom_clash, axis=-1) return [list(position_clash_ratio)]
def native_contact(rec_path, reorder_path, dock_path): """ calculate native contact ratio for the docking result args: rec_path:: str path of splited receptor reorder_path:: str path of reorder ligand dock_path:: str path of docking result returns: native_contact:: float native contact value """ parsed_docked = prody.parsePDB(dock_path).select('not hydrogen') parsed_crystal = prody.parsePDB(reorder_path).select('not hydrogen') parsed_rec = prody.parsePDB(rec_path).select('not hydrogen') cry_atom_num = parsed_crystal.numAtoms() lig_atom_num = parsed_docked.numAtoms() assert cry_atom_num == lig_atom_num docked_coords = parsed_docked.getCoordsets() crystal_coord = parsed_crystal.getCoords() rec_coord = parsed_rec.getCoords() exp_crystal_coord = np.expand_dims(crystal_coord, -2) cry_diff = exp_crystal_coord - rec_coord cry_distance = np.sqrt(np.sum(np.square(cry_diff), axis=-1)) exp_docked_coords = np.expand_dims(docked_coords, -2) docked_diff = exp_docked_coords - rec_coord docked_distance = np.sqrt(np.sum(np.square(docked_diff),axis=-1)) cry_contact = (cry_distance < distance_threshold).astype(int) num_contact = np.sum(cry_contact).astype(float) lig_contact = (docked_distance < distance_threshold).astype(int) contact_ratio = np.sum(cry_contact * lig_contact, axis=(-1,-2)) / num_contact return [list(contact_ratio)]
def clean_pair(bound_pdb, bound_chains, peptide_chains, unbound_pdb, unbound_chains): # 2 bound_receptor = parsePDB(bound_pdb, chain=bound_chains+peptide_chains) writePDB('b.pdb',bound_receptor.select('protein and chain %s' % ' '.join(list(bound_chains)))) writePDB('p.pdb',bound_receptor.select('protein and chain %s' % peptide_chains)) #3 unbound_receptor = parsePDB(unbound_pdb, chain=unbound_chains) alignment_results = compare.matchAlign(unbound_receptor, bound_receptor) unbound_receptor = alignment_results[0] writePDB('unb.pdb',unbound_receptor.select('protein')) writePDB('b.pdb',bound_receptor.select('protein and chain %s' % ' '.join(list(bound_chains)))) writePDB('p.pdb',bound_receptor.select('protein and chain %s' % peptide_chains)) writePDB('up.pdb',unbound_receptor.select('protein') | bound_receptor.select('protein and chain %s' % peptide_chains)) return 0
def align_by_resid(input_pdb_path, target_pdb_path): input_mol = parsePDB(input_pdb_path) target_mol = parsePDB(target_pdb_path) target_resid, input_resid = target_mol.select( 'calpha').getResnums(), input_mol.select('calpha').getResnums() target_index = np.where(np.in1d(input_resid, target_resid))[0] native_index = np.where(np.in1d(target_resid, input_resid))[0] if len(input_mol.select('name CA').getSequence()) < 25 or len( target_mol.select('name CA').getSequence()) < 25 or len( target_index) < 25: return None, None, None input_mol = input_mol.select( 'resindex ' + reduce(lambda a, b: str(a) + ' ' + str(b), target_index)) return input_mol, target_index, native_index
def compare_pdb_files(file1, file2): """Returns the RMSD between two PDB files of the same protein. Args: file1 (str): Path to first PDB file. file2 (str): Path to second PDB file. Must be the same protein as in file1. Returns: float: Root Mean Squared Deviation (RMSD) between the two structures. """ s1 = pr.parsePDB(file1) s2 = pr.parsePDB(file2) transformation = pr.calcTransformation(s1, s2) s1_aligned = transformation.apply(s1) return pr.calcRMSD(s1_aligned, s2)
def prody_align(opt): """Align models in a PDB file or a PDB file onto others.""" import prody LOGGER = prody.LOGGER args = opt.pdb if len(args) == 1: pdb = args[0] LOGGER.info('Aligning multiple models in: ' + pdb) selstr, prefix, model = opt.select, opt.prefix, opt.model pdb = prody.parsePDB(pdb) pdbselect = pdb.select(selstr) if pdbselect is None: opt.subparser.error('Selection {0:s} do not match any atoms.' .format(repr(selstr))) LOGGER.info('{0:d} atoms will be used for alignment.' .format(len(pdbselect))) pdbselect.setACSIndex(model-1) prody.printRMSD(pdbselect, msg='Before alignment ') prody.alignCoordsets(pdbselect) prody.printRMSD(pdbselect, msg='After alignment ') if prefix == '': prefix = pdb.getTitle() + '_aligned' outfn = prefix + '.pdb' LOGGER.info('Writing file: ' + outfn) prody.writePDB(outfn, pdb) else: reffn = args.pop(0) seqid=opt.seqid overlap=opt.overlap LOGGER.info('Aligning structures onto: ' + reffn) ref = prody.parsePDB(reffn) for arg in args: if arg == reffn: continue if '_aligned.pdb' in arg: continue pdb = prody.parsePDB(arg) result = prody.matchAlign(pdb, ref, seqid=seqid, overlap=overlap, tarsel=opt.select, allcsets=True, cslabel='Model', csincr=1) if result: outfn = pdb.getTitle() + '_aligned.pdb' LOGGER.info('Writing file: ' + outfn) prody.writePDB(outfn, pdb) else: LOGGER.warning('Failed to align ' + arg)
def calc_pocket_rmsd(rec, lig, root): """ Calculate difference between the ligand reference receptor and the receptor it is being docked into. From original script by David Koes """ ligrec = lig.replace("LIG_aligned.sdf", "PRO.pdb") rec = prody.parsePDB(os.path.join(root, rec)) ligrec = prody.parsePDB(os.path.join(root, ligrec)) lig = next(pybel.readfile("sdf", os.path.join(root, lig))) c = np.array([a.coords for a in lig.atoms]) nearby = rec.select("protein and same residue as within 3.5 of point", point=c) matches = [] for cutoff in range(90, 0, -10): # can't just set a low cutoff since we'll end up with bad alignments # try a whole bunch of alignments to maximize the likelihood we get the right one m = prody.matchChains(rec, ligrec, subset="all", overlap=cutoff, seqid=cutoff, pwalign=True) if m: matches += m minrmsd = np.inf minbackrmsd = np.inf for rmap, lrmap, _, _ in matches: try: closeatoms = set(nearby.getIndices()) lratoms = [] ratoms = [] for i, idx in enumerate(rmap.getIndices()): if idx in closeatoms: lratoms.append(lrmap.getIndices()[i]) ratoms.append(idx) if len(lratoms) == 0: continue rmsd = prody.calcRMSD(rec[ratoms], ligrec[lratoms]) backrmsd = prody.calcRMSD(rec[ratoms] & rec.ca, ligrec[lratoms] & ligrec.ca) if rmsd < minrmsd: minrmsd = rmsd minbackrmsd = backrmsd except: pass return minrmsd, minbackrmsd
def reorder(bucket, table_idx, param, input_data): # todo(maksym) smina_reorder try: receptor, chain, resnum, resname = input_data output_folder = param['output_folder'] output_folder = '{}_{}'.format(table_idx, output_folder) input_lig_folder = param['input_ligand_folder'] input_rec_folder = param['input_receptor_folder'] smina_pm = smina_param() smina_pm.param_load(param['smina_param']) out_dir = os.path.join(data_dir, output_folder, receptor) _makedir(out_dir) out_name = '_'.join(input_data + ['ligand']) + '.pdb' out_path = os.path.join(out_dir, out_name) input_lig_dir = os.path.join(data_dir, input_lig_folder, receptor) # lig_dir = input_lig_dir lig_name = '_'.join(input_data + ['ligand']) + '.pdb' input_lig_path = os.path.join(input_lig_dir, lig_name) input_rec_dir = os.path.join(data_dir, input_rec_folder, receptor) # rec_dir = input_rec_dir rec_name = '_'.join(input_data + ['receptor']) + '.pdb' input_rec_path = os.path.join(input_rec_dir, rec_name) kw = { 'receptor': input_rec_path, 'ligand': input_lig_path, 'autobox_ligand': input_lig_path, 'out': out_path } cmd = smina_pm.make_command(**kw) # todo(maksym) smina_cmd cl = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) cl.wait() prody.parsePDB(out_path) record = input_data + [1, 'success'] records = [record] db.insert(table_idx, records, bucket=bucket) except Exception as e: record = input_data + [0, str(e)] records = [record] db.insert(table_idx, records, bucket=bucket)
def preprocess_single(model, chain_name): hv = model.getHierView() for chain in hv.iterChains(): chain.setChids(chain_name) prody.writePDB("tmp.pdb", model) model = prody.parsePDB("tmp.pdb") check_call(["rm", "tmp.pdb"]) pos = 1 apos = 1 hv = model.getHierView() for chain in hv.iterChains(): # print "chain : " , chain for res in chain.iterResidues(): res.setResnums(pos) for atom in res: atom.setSerial(apos) apos += 1 if len(res.getIcode()) == 0: pos += 1 hv.update() return model
def __init__(self, pdbid, cfg, ): self.pdbid = pdbid self.config = cfg self.svm = config.trainClassifier(self.config) fn_pattern = BOUND_FILENAME_PATTERN if self.config.testing.is_bound \ else UNBOUND_FILENAME_PATTERN self.pdb_filename = fn_pattern.format(pdb=self.pdbid) self.receptor = prody.parsePDB(self.pdb_filename).protein.noh self.ddgs = self.config.testing.label_data_df.ix[self.pdbid] self.confidence = pd.Series( data=config.predictClassifier(self.config), index=self.config.testing.label_data_df.index, ).ix[self.pdbid] self.mask_binding = self.ddgs > self.config.testing.ddg_cutoff self.mask_positive = self.confidence > 0 self.surface_resnums = self.ddgs.index self.binding_resnums = \ self.surface_resnums[self.mask_binding] self.positive_resnums = \ self.surface_resnums[self.mask_positive] def receptor_residues(resnums): return self.receptor.select( 'resnum %s' % ' '.join(map(str, resnums))) self.surface_residues = receptor_residues(self.surface_resnums) self.positive_residues = receptor_residues(self.positive_resnums)
def phi2pdb(base_pdb, phi, save_path="./"): pdb = prody.parsePDB(base_pdb) atoms = [a for a in pdb] for a in pdb: a.setBeta(0.0) phif = open(phi, "r") phic = phif.readlines() phif.close() for l in phic: x,y,z,k = l.strip().split(",") x = np.float(x) y = np.float(y) z = np.float(z) k = np.float(k) for a in xrange(len(atoms)): X,Y,Z = atoms[a].getCoords() if X==x and Y==y and Z==z: atoms[a].setBeta(k) atoms.pop(a) break out_pdb = os.path.join(save_path, phi.split(os.sep)[-1][:-4]+".pdb") prody.writePDB(out_pdb, pdb) return out_pdb
def getPairInformation(pdbid, reference_chain, pair_chain, cutoff=5, covalent_bond_cutoff=5): """ 1. reads pdb id from file 2. selects atoms from pair of chains within cutoff # draws selection within interface (simplest possible view) reference == oncogene, pair == peptide """ atoms = prody.parsePDB(pdbid) # TODO: turn off debug reference_atoms = atoms.select("chain %s and not water" % reference_chain) pair_atoms = atoms.select("chain %s and not water" % pair_chain) # next try to select everything ref_contacts = prody.measure.contacts.Contacts(reference_atoms) ref_selection = ref_contacts.select(cutoff, pair_atoms) # we need these atoms pair_contacts = prody.measure.contacts.Contacts(pair_atoms) pair_selection = pair_contacts.select(cutoff, reference_atoms) # and these sulfur_pairs = [] ## 1. select Cys atoms on oncogene for (r, ch2, distance) in prody.measure.contacts.findNeighbors( reference_atoms, covalent_bond_cutoff, pair_atoms): if r.getResname() in 'CYS': # and r.getElement() in ['S'] : sulfur_pairs.append((r.getSerial(), ch2.getSerial())) # filtering: if there is no Cys, return nothing if len(sulfur_pairs) < 1: return None return (pdbid, reference_chain, set(ref_selection.getResnums()), pair_chain, set(pair_selection.getResnums()), sulfur_pairs)
def __init__(self, pdbid, context='bound'): self.pdbid = pdbid.upper() self.context = context bound = True if context=='bound' else False self.DATA_PATH = j(PEPTIDB_DATA_PATH, self.context) self.PDB_DATA_PATH = j(self.DATA_PATH, self.context+'Set', 'mainChain' if bound else '') self.FTMAP_DATA_PATH = j(self.DATA_PATH, 'FTMapAnalysis', 'ftmapData') self.CONSURF_DATA_PATH = j(self.DATA_PATH, 'ConSurfAnalysis', 'data') self.receptor_filename = j(self.PDB_DATA_PATH, '%s.pdb' % self.pdbid) self.receptor_atoms = prody.parsePDB(self.receptor_filename).protein self.receptor_chain = self.receptor_atoms.getHierView().iterChains().next() #print "###%s###" % self.receptor_filename self.resnum_index = pd.MultiIndex.from_tuples( zip( [pdbid]*self.receptor_chain.numResidues(), self.receptor_chain.ca.getResnums() ), names = [ 'PDB identifier', 'Residue number', ] ) self.df = pd.DataFrame(index=self.resnum_index) print self.pdbid
def prody_biomol(pdbname,**kwargs): """Generate biomolecule coordinates. :arg pdb: :term:`PDB` identifier or filename :arg prefix: prefix for output files, default is :file:`_biomol` :arg biomol: index of the biomolecule, by default all are generated""" import prody LOGGER = prody.LOGGER prefix, biomol = kwargs.get('prefix',None), kwargs.get('biomol') pdb, header = prody.parsePDB(pdbname, header=True) if not prefix: prefix = pdb.getTitle() biomols = prody.buildBiomolecules(header, pdb, biomol=biomol) if not isinstance(biomols, list): biomols = [biomols] for i, biomol in enumerate(biomols): if isinstance(biomol, prody.Atomic): outfn = '{0:s}_biomol_{1:d}.pdb'.format(prefix, i+1) LOGGER.info('Writing {0:s}'.format(outfn)) prody.writePDB(outfn, biomol) elif isinstance(biomol, tuple): for j, part in enumerate(biomol): outfn = ('{0:s}_biomol_{1:d}_part_{2:d}.pdb' .format(prefix, i+1, j+1)) LOGGER.info('Writing {0:s}'.format(outfn)) prody.writePDB(outfn, part)
def main(): import sys import getopt import csv import prody as pr #usage = \ """ Copyright (c) 2007 Bosco Ho Calculates the total Accessible Surface Area (ASA) of atoms in a PDB file. Usage: asa.py -s n_sphere in_pdb [out_pdb] - out_pdb PDB file in which the atomic ASA values are written to the b-factor column. -s n_sphere number of points used in generating the spherical dot-density for the calculation (default=960). The more points, the more accurate (but slower) the calculation. """ #opts, args = getopt.getopt(sys.argv[1:], "n:") #if len(args) < 1: # print usage # return #mol = molecule.Molecule(args[0]) #pdb = molecule.Molecule('dimers/1R0R.pdb') pdb = pr.parsePDB('dimers/1R0R.pdb') #atoms = mol.atoms() #molecule.add_radii(atoms) data = [] #for o, a in opts: # if '-n' in o: # n_sphere = int(a) # print "Points on sphere: ", n_sphere # #n_sphere = [500] n_sphere = range(10,2000,10) for n in n_sphere: asas = calculate_asa_np(pdb, 1.4, n) data.append(asas) #print "%i, %.1f angstrom squared." % n, sum(asas) print(str(n) + ", " + str(sum(asas)) + " angstrom squared.") f_test = open('perturbation_analysis.csv','w') c = csv.writer(f_test) for i in xrange(len(data)): c.writerow(data[i])
def computeEachAtomAllTrajectoriesMean(trajectories): """ Computes the mean of each atom's position in all the trajectories """ ri = [] riMeasures = [] for i, traj in enumerate(trajectories): #trajectory = prody.parsePDB(traj) trajectory = prody.parsePDB(traj, subset='calpha') coordinates = trajectory.getCoordsets() ensembleTrajectory = prody.PDBEnsemble("Complex") ensembleTrajectory.setAtoms(trajectory) ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:]) ensembleTrajectory.setCoords(coordinates[0]) #reference ensembleTrajectory.superpose() #ensembleTrajectory = trajectory sri, sriMeasures = addTrajectoryCoordinates(ensembleTrajectory.getCoordsets(), trajectory.numAtoms()) #sri, sriMeasures = addTrajectoryCoordinates(trajectory.getCoordsets(), trajectory.numAtoms()) ri.append(sri) riMeasures.append(sriMeasures) return average(ri, riMeasures)
def computeEachAtomsUnnormalisedAutocorrelation(trajectories, avgR): """ Computes the autocorrelation with the formula: C(k) = 1/[(n-k)] \sum_{t=1}^{n-k} (Xt - mu)(Xt+k - mu) To normalise it wihitn [-1:1] c(k) = C(k) / var When the true mean \mu and variance \sigma^2 are known, this estimate is unbiased. """ rirj = [] rirjMeasures = [] for i, traj in enumerate(trajectories): #trajectory = prody.parsePDB(traj) trajectory = prody.parsePDB(traj, subset='calpha') coordinates = trajectory.getCoordsets() #superpose ensembleTrajectory = prody.PDBEnsemble("Complex") ensembleTrajectory.setAtoms(trajectory) ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:]) ensembleTrajectory.setCoords(coordinates[0]) #reference ensembleTrajectory.superpose() #ensembleTrajectory = trajectory srirj, sMeasures = computeEachAtomsUnnormalisedAutocorrelationForASingleTrajectory(ensembleTrajectory.getCoordsets(), avgR) rirj.append(srirj) rirjMeasures.append(sMeasures) rirj = sumOverTrajectories(rirj) rirjMeasures = sumOverTrajectories(rirjMeasures) return rirj/rirjMeasures
def find_close(native_name, traj_name, skip_frames): native = prody.parsePDB(native_name) traj = prody.parsePDB(traj_name) ensemble = prody.Ensemble('ensemble') ensemble.setCoords(native.getCoords()) ensemble.addCoordset(traj.getCoordsets()[skip_frames:, ...]) # skip the first 10 frames ensemble.superpose() native_coords = native.getCoords() ensemble_coords = ensemble.getCoordsets() diff2 = (ensemble_coords - native_coords) ** 2 diff2 = numpy.sum(diff2, axis=2) min_dev = numpy.min(diff2, axis=0) return numpy.sqrt(numpy.sum(min_dev) / float(min_dev.shape[0]))
def generate_neighborhood_atom_list(input_pdbs, neighbors, acceptable_atoms_wt_set, acceptable_atoms_mut_set, input_type): coordinates = [] for input_pdb in input_pdbs: if 'WT.' not in input_pdb: atom_list = [] neighborhood = prody.parsePDB(input_pdb) neighborhood_hv = neighborhood.getHierView() res_list = [neighborhood_hv[neighbor[1], neighbor[0][1]] for neighbor in neighbors] for res in res_list: # Check if numbering should be for WT or Mutant # Check that residues are present in acceptable_residues # Check that atom coordinates are present in acceptable_atoms for atom in res: if input_type == 'Mutant PDB': # print (mut_to_wt_chains[res.getChid()], res.getResname(), # int(residue_maps_reverse[(res.getChid(), mut_to_wt_chains[res.getChid()])][ # '%s %s ' % (res.getChid(), (' ' + str(res.getResnum()))[-3:])].split()[1]), # atom.getName()) if (res.getChid(), res.getResname(),int(res.getResnum()), atom.getName()) in acceptable_atoms_mut_set: if atom.getElement() != 'H': atom_list.append(str(atom.getIndex())) else: if (res.getChid(), res.getResname(), res.getResnum(), atom.getName()) in acceptable_atoms_wt_set: if atom.getElement() != 'H': atom_list.append(str(atom.getIndex())) coordinates.append(neighborhood.select('index ' + ' '.join(atom_list))) return coordinates
def generate_point_atom_list(input_pdbs, mutations, acceptable_atoms_wt_set, acceptable_atoms_mut_set, mut_key_dict, input_type): mutation_dict = {} for counter, mutation in enumerate(mutations): temp = [] for input_pdb in input_pdbs: if 'WT.' not in input_pdb: atom_list = [] point_mutant = prody.parsePDB(input_pdb) point_mutant_hv = point_mutant.getHierView() res_list = [point_mutant_hv[mutation[1], int(mutation[0])]] for res in res_list: # Check if numbering should be for WT or Mutant # Check that residues are present in acceptable_residues # Check that atom coordinates are present in acceptable_atoms for atom in res: if input_type == 'Mutant PDB': if (res.getChid(), res.getResname(),int(res.getResnum()), atom.getName()) in acceptable_atoms_mut_set: if atom.getElement() != 'H': atom_list.append(str(atom.getIndex())) if input_type == 'RosettaOut': if (res.getChid(), res.getResname(), res.getResnum(), atom.getName()) in acceptable_atoms_wt_set: if atom.getElement() != 'H': atom_list.append(str(atom.getIndex())) temp.append(point_mutant.select('index ' + ' '.join(atom_list))) if input_type == 'Mutant PDB': mutation_dict[mut_key_dict[mutation[1] + str(mutation[0])]] = temp if input_type == 'RosettaOut': mutation_dict[mutation[1] + str(mutation[0])] = temp return mutation_dict
def runThrough(pfile): # initial setup print "Running through " + pfile + "..." numMdls = getNumMdls(pfile) #print numMdls appf = pr.parsePDB(pfile, model=numMdls, secondary=True, chain='A', altLoc=False) los = sheets.initializeList(pfile) parseHelices(appf) if(los != None): parseSheets(appf,los) # get them means COLUMN_D_ON_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_D_ON] COLUMN_D_OH_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_D_OH] COLUMN_A_NHO_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_A_NHO] COLUMN_A_HOC_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_A_HOC] COLUMN_BETA_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_BETA] COLUMN_GAMMA_AV = [sum(x)/len(x) if len(x) > 0 else 0 for x in COLUMN_GAMMA] # get the std devs, nasty shit COLUMN_D_ON_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_D_ON] COLUMN_D_OH_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_D_OH] COLUMN_A_NHO_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_A_NHO] COLUMN_A_HOC_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_A_HOC] COLUMN_BETA_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_BETA] COLUMN_GAMMA_STD = [np.std(x) if len(x) > 0 else 0 for x in COLUMN_GAMMA] TABLE = [COLUMN_D_ON_AV, COLUMN_D_OH_AV, COLUMN_A_NHO_AV, COLUMN_A_HOC_AV, COLUMN_BETA_AV, COLUMN_GAMMA_AV] STDS = [COLUMN_D_ON_STD, COLUMN_D_OH_STD, COLUMN_A_NHO_STD, COLUMN_A_HOC_STD, COLUMN_BETA_STD, COLUMN_GAMMA_STD] #print ' D_ON D_OH ANGLE(NHO) ANGLE(HOC) BETA GAMMA ' #print np.array(TABLE).T #print np.array(STDS).T return (TABLE,STDS)
def test_sasa_3(): ag = prody.parsePDB(TEST_DATA / '1atom.pdb') sasa = calc_sasa(ag, normalize=False, change_radii={'N': 0.3}, probe_radius=0.001) assert abs(sasa[0] - 4 * 3.14 * 0.3**2) < 0.1
def get_voxel(input_path, buffer, width): input_mol = parsePDB(input_path) input_mol = input_mol.select( 'element C or element N or element O or element S') occus = make_voxel(input_mol=input_mol, buffer=buffer, width=width) return occus, input_mol.select('name CA').getResnames(), input_mol.select( 'name CA').getResnums()
def align_fasta(input_pdb_path, target_fasta_path): pdb = parsePDB(input_pdb_path) input_fasta_path = tempfile.mktemp(suffix='.fasta') f = open(input_fasta_path, 'w') f.write('>temp\n') if len(pdb.select('name CA').getSequence()) < 25: return None, None, None else: f.write(reduce(lambda a, b: a + b, pdb.select('name CA').getSequence())) f.close() needle_path = tempfile.mktemp(suffix='.needle') cmd = [ 'needle', '-outfile', needle_path, '-asequence', input_fasta_path, '-bsequence', target_fasta_path, '-gapopen', '10', '-gapextend', '0.5' ] subprocess.call(cmd) needle_result = list(AlignIO.parse(needle_path, 'emboss'))[0] input_seq, target_seq = np.array(list(str( needle_result[0].seq))), np.array(list(str(needle_result[1].seq))) input_seq, target_seq = input_seq[np.where( target_seq != '-')], target_seq[np.where(input_seq != '-')] input_align_indices = np.where(target_seq != '-')[0] target_align_indices = np.where(input_seq != '-')[0] align_pdb = pdb.select( 'resindex ' + reduce(lambda a, b: str(a) + ' ' + str(b), input_align_indices)) input_mol = input_mol.select( 'element C or element N or element O or element S') return align_pdb, input_align_indices, target_align_indices
def inline_fitness(pdb_id, verbose): structure = parsePDB(pdb_id) for chain in structure.iterChains(): if not is_rna(chain): continue for residue in chain.iterResidues(): try: fitness = calc_inline_fitness(residue, verbose) except AttributeError: # end of the chain continue # pos = the position *downstream* of the examined # internucleotide. chain_id = chain.getChid() res_num = residue.getResnum() res_id = residue.getResname() fields = (chain_id, res_id, res_num, fitness) print '\t'.join(map(str, fields))
def prody_biomol(opt): """Generate biomolecule coordinates based on command line arguments.""" import prody LOGGER = prody.LOGGER prefix, biomol = opt.prefix, opt.biomol pdb, header = prody.parsePDB(opt.pdb, header=True) if not prefix: prefix = pdb.getTitle() biomols = prody.buildBiomolecules(header, pdb, biomol=biomol) if not isinstance(biomols, list): biomols = [biomols] for i, biomol in enumerate(biomols): if isinstance(biomol, prody.Atomic): outfn = '{0:s}_biomol_{1:d}.pdb'.format(prefix, i+1) LOGGER.info('Writing {0:s}'.format(outfn)) prody.writePDB(outfn, biomol) elif isinstance(biomol, tuple): for j, part in enumerate(biomol): outfn = ('{0:s}_biomol_{1:d}_part_{2:d}.pdb' .format(prefix, i+1, j+1)) LOGGER.info('Writing {0:s}'.format(outfn)) prody.writePDB(outfn, part)
def _generate_sidechains_scwrl(self): if not self.rec is None: rec = self.rec.copy() rec.setChids('A') lig = self._tpl.copy() rec = BasePDB(ag=rec) lig = BasePDB(ag=lig) merged = rec.add_mol(lig, keep_resi=False, keep_chains=True) merged.save(self._mrg_file) self._make_scwrl_sequence_file() call = [ define.SCWRL_EXE, '-h', '-i', self._mrg_file, '-o', self._scw_file, '-s', self._seq_file ] else: prody.writePDB(self._mrg_file, self._tpl) call = [ define.SCWRL_EXE, '-h', '-i', self._mrg_file, '-o', self._scw_file ] # scwrl wants rosetta hydrogen naming BasePDB(self._mrg_file).to_rosetta().save(self._mrg_file) helpers.shell_call(call) pep = prody.parsePDB(self._scw_file) # extract peptide and renumber pep = BasePDB(ag=pep.select('chain B').copy()).renumber( keep_resi=False).ag self.pep = pep
def fix_openmm(): # get the whole crystal structure # get only the ATOM records # and HETAM records for MSE # convert MSE to MET with open('no_smet.pdb', 'w') as outfile: with open('experimental.pdb') as infile: for line in infile: if line.startswith('ATOM'): outfile.write(line) if line.startswith('HETATM'): if line[17:20] == 'MSE': atom_name = line[12:17] if atom_name == 'SE ': atom_name = ' SD ' line_fixed = 'ATOM ' + line[6:12] + atom_name + 'MET' + line[20:67] + '\n' outfile.write(line_fixed) # load the file into prody p = prody.parsePDB('no_smet.pdb') p = p.select('not hydrogen') # get one of the rosetta models r = prody.parsePDB('rosetta.pdb') # perform an alignment to find out what part of the crystal structure # corresponds to the rosetta file match = prody.matchChains(r, p, subset='all', overlap=25, pwalign=True)[0][1] print len(match) prody.writePDB('chain.pdb', match) # now clean it up with pdb fixer subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb', shell=True) # now load it with zam p = protein.Protein('output.pdb') p.Dehydrogen() disulfide_pairs = find_disulfide(p) for r1, r2 in disulfide_pairs: print ' added disulfide between {} and {}'.format(r1, r2) p.Res[r1].FullName = 'CYX' p.Res[r2].FullName = 'CYX' p.WritePdb('start.pdb') # now run tleap print ' running tleap' run_tleap(disulfide_pairs)
def prody_catdcd(opt): """Concatenate DCD files.""" import prody LOGGER = prody.LOGGER if opt.num: num = [] for dcd in opt.dcd: dcd = prody.DCDFile(dcd) num.append(dcd.numFrames()) for n in num: print(n) print(sum(num)) return align = opt.align ag = opt.psf or opt.pdb if ag: if os.path.splitext(ag)[1].lower() == '.psf': ag = prody.parsePSF(ag) else: ag = prody.parsePDB(ag) elif align: raise ValueError('one of PSF or PDB files must be provided for ' 'align option to work') dcd = opt.dcd traj = prody.Trajectory(dcd.pop(0)) while dcd: traj.addFile(dcd.pop(0)) if ag: traj.setAtoms(ag) select = traj.select(opt.select) LOGGER.info('{0:d} atoms are selected for writing output.' .format(len(select))) if align: _ = traj.select(align) LOGGER.info('{0:d} atoms are selected for aligning frames.' .format(len(_))) out = prody.DCDFile(opt.output, 'w') count = 0 goto = False if opt.stride != 1: goto = True slc = slice(opt.first, opt.last, opt.stride).indices(len(traj)+1) for i in range(*slc): if goto: traj.goto(i) frame = traj.next() if align: frame.superpose() out.write(select._getCoords(), frame.getUnitcell()) else: out.write(frame._getCoords(), frame.getUnitcell()) count += 1 traj.close() out.close() LOGGER.info("{0:d} frames are written into '{1:s}'." .format(count, opt.output))
def combine_structures(directory_with_pdbs, output_filename): search_string = join(directory_with_pdbs, '*.pdb') path_list = glob(search_string) if len(path_list) > MAX_FRAMES: raise RuntimeError('Got %d frames, but only up to %d frames are allowed.') else: pass atom_group = parsePDB(path_list[0]) for i, path in enumerate(path_list): if i == 0: continue else: p = parsePDB(path) atom_group.addCoordset(p) writePDB(output_filename, atom_group)
def load(self,filename): self.filname = filename self.model = prody.parsePDB(self.filname, model=1) print ("self.center",self.center) if self.center : # c = calcCenter(self.model) moveAtoms(self.model, to=numpy.zeros(3)) self.ca_model = self.model.select('protein and name CA')#what about DNA
def prody_align(opt): """Align models in a PDB file or a PDB file onto others.""" import prody LOGGER = prody.LOGGER args = opt.pdb if len(args) == 1: pdb = args[0] LOGGER.info('Aligning multiple models in: ' + pdb) selstr, prefix, model = opt.select, opt.prefix, opt.model pdb = prody.parsePDB(pdb) pdbselect = pdb.select(selstr) if pdbselect is None: LOGGER.warning('Selection "{0:s}" do not match any atoms.' .format(selstr)) sys.exit(-1) LOGGER.info('{0:d} atoms will be used for alignment.' .format(len(pdbselect))) pdb.setACSIndex(model-1) prody.alignCoordsets(pdb, selstr=selstr) rmsd = prody.calcRMSD(pdb) LOGGER.info('Max RMSD: {0:0.2f} Mean RMSD: {1:0.2f}' .format(rmsd.max(), rmsd.mean())) if prefix == '': prefix = pdb.getTitle() + '_aligned' outfn = prefix + '.pdb' LOGGER.info('Writing file: ' + outfn) prody.writePDB(outfn, pdb) else: reffn = args.pop(0) LOGGER.info('Aligning structures onto: ' + reffn) ref = prody.parsePDB(reffn) for arg in args: if arg == reffn: continue if '_aligned.pdb' in arg: continue pdb = prody.parsePDB(arg) if prody.matchAlign(pdb, ref): outfn = pdb.getTitle() + '_aligned.pdb' LOGGER.info('Writing file: ' + outfn) prody.writePDB(outfn, pdb) else: LOGGER.warning('Failed to align ' + arg)
def test_calc_matrix(self): pdb_structure = prody.parsePDB("data/3_models.pdb") expected = [ 35.01002624, 47.60315215, 88.64981522, 32.90471145, 87.13023459, 85.76106107] product_matrix = DihedralRMSDMatrixCalculator.build(pdb_structure) # print "out", product_matrix.get_data() # print "out", product_matrix.get_data() # print product_matrix.get_data()[0] # print product_matrix[0,1] numpy.testing.assert_almost_equal(expected, product_matrix.get_data(),8)
def conservation(self,): consurf_pdb_filename = os.path.join(self.CONSURF_DATA_PATH, self.pdbid, 'pdbFILE_view_ConSurf.pdb') p = prody.parsePDB(consurf_pdb_filename) consurf_chain = p.getHierView().iterChains().next() return pd.DataFrame( columns=['Conservation-score'], index=self.resnum_index, data=consurf_chain.ca.getBetas()[:len(self.resnum_index)], )
def preprocess_pdb(args): pdb_file = args[1] output = "./" + args[2]+"/"+args[2] create_directory("./" + args[2]) cluster_frames = get_frame_numbers(args) pdb = prody.parsePDB(pdb_file) # Get a copy of the pdb coords input_coordsets = numpy.array(pdb.getCoordsets()[cluster_frames]) # Empty pdb pdb.delCoordset(range(pdb.numCoordsets())) # Build another pdb to store it input_pdb = prody.parsePDB(pdb_file) input_pdb.delCoordset(range(input_pdb.numCoordsets())) # And add the chosen coordsets for i in range(len(cluster_frames)): input_pdb.addCoordset(input_coordsets[i]) prody.writePDB(output+"_ini.pdb", input_pdb) print_matrix(input_pdb.select("name CA").getCoordsets(), output) return pdb, input_coordsets, cluster_frames, output
def _from_file(path): """ Load a ProDy AtomGroup from a pdb file. Parameters: path - a string containing a filepath to a PDB file Returns: A ProDy AtomGroup """ return prody.parsePDB(path)
def read_naccess_asa(asa_filename): ''' Reads per-atom ASA values from ``asa_filename``. Returns a pandas.Series object containing the data. ''' # asa file is a PDB file with ASA values as # occupancy and VDW radii as B-factor atoms = prody.parsePDB(asa_filename).protein atoms_asa = atoms.getOccupancies() atoms_asa_series = pd.Series(data=atoms_asa, name='per_atom_asa') return atoms_asa_series
def setUp(self): self.output = join(TEMPDIR, 'test_prody_catdcd.dcd') self.dcdpath = pathDatafile('dcd') self.pdbpath = pathDatafile('multi_model_truncated') self.dcd = DCDFile(self.dcdpath) self.ag = parsePDB(self.pdbpath, model=1) self.command = 'catdcd -o ' + self.output self.tearDown()
def _find_ligand(self): self._get_file_path(ligand=True) protein = parsePDB(self.file_path) try: seq = protein['A'].getSequence() except: pass else: ligand = protein.select('not protein and not water') repr(ligand) if ligand: self.out_filename = self.file_path.split('.')[0] + '_ligand.pdb' writePDB(self.out_filename, ligand)