def copyDataFromTarget(target, model, labels=('psipred', 'netsurfp')): data = {} for label in target.getDataLabels(): if not label.startswith(tuple(labels)): continue data[label] = np.zeros(model.numAtoms(), dtype=target.getData(label).dtype) for tchain in target.iterChains(): #try to find matching chain first if tchain.getChid() in [x.getChid() for x in model.iterChains()]: mchain = tchain.getChid() else: mchain = prody.matchChains(model, target[tchain.getChid()], seqid=50, overlap=20)[0][0] mchain = mchain.copy().getChids()[0] for tres in tchain.ca.copy().iterResidues(): for label in data: try: indices = model[mchain][tres.getResnum()].getIndices() data[label][indices] = tres.ca.getData(label) except AttributeError: #target may have more residues than #the model pass for label in data: if data[label].dtype.char == 'S': data[label][data[label] == ''] = '-' model.setData(label, data[label])
def fix_openmm(): # get the whole crystal structure # get only the ATOM records # and HETAM records for MSE # convert MSE to MET with open('no_smet.pdb', 'w') as outfile: with open('experimental.pdb') as infile: for line in infile: if line.startswith('ATOM'): outfile.write(line) if line.startswith('HETATM'): if line[17:20] == 'MSE': atom_name = line[12:17] if atom_name == 'SE ': atom_name = ' SD ' line_fixed = 'ATOM ' + line[ 6:12] + atom_name + 'MET' + line[20:67] + '\n' outfile.write(line_fixed) # load the file into prody p = prody.parsePDB('no_smet.pdb') p = p.select('not hydrogen') # get one of the rosetta models r = prody.parsePDB('rosetta.pdb') # perform an alignment to find out what part of the crystal structure # corresponds to the rosetta file match = prody.matchChains(r, p, subset='all', overlap=25, pwalign=True)[0][1] print len(match) prody.writePDB('chain.pdb', match) # now clean it up with pdb fixer subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb', shell=True) # now load it with zam p = protein.Protein('output.pdb') p.Dehydrogen() disulfide_pairs = find_disulfide(p) for r1, r2 in disulfide_pairs: print ' added disulfide between {} and {}'.format(r1, r2) p.Res[r1].FullName = 'CYX' p.Res[r2].FullName = 'CYX' p.WritePdb('start.pdb') # now run tleap print ' running tleap' run_tleap(disulfide_pairs)
def calc_pocket_rmsd(rec, lig, root): """ Calculate difference between the ligand reference receptor and the receptor it is being docked into. From original script by David Koes """ ligrec = lig.replace("LIG_aligned.sdf", "PRO.pdb") rec = prody.parsePDB(os.path.join(root, rec)) ligrec = prody.parsePDB(os.path.join(root, ligrec)) lig = next(pybel.readfile("sdf", os.path.join(root, lig))) c = np.array([a.coords for a in lig.atoms]) nearby = rec.select("protein and same residue as within 3.5 of point", point=c) matches = [] for cutoff in range(90, 0, -10): # can't just set a low cutoff since we'll end up with bad alignments # try a whole bunch of alignments to maximize the likelihood we get the right one m = prody.matchChains(rec, ligrec, subset="all", overlap=cutoff, seqid=cutoff, pwalign=True) if m: matches += m minrmsd = np.inf minbackrmsd = np.inf for rmap, lrmap, _, _ in matches: try: closeatoms = set(nearby.getIndices()) lratoms = [] ratoms = [] for i, idx in enumerate(rmap.getIndices()): if idx in closeatoms: lratoms.append(lrmap.getIndices()[i]) ratoms.append(idx) if len(lratoms) == 0: continue rmsd = prody.calcRMSD(rec[ratoms], ligrec[lratoms]) backrmsd = prody.calcRMSD(rec[ratoms] & rec.ca, ligrec[lratoms] & ligrec.ca) if rmsd < minrmsd: minrmsd = rmsd minbackrmsd = backrmsd except: pass return minrmsd, minbackrmsd
def fix_openmm(): # get the whole crystal structure # get only the ATOM records # and HETAM records for MSE # convert MSE to MET with open('no_smet.pdb', 'w') as outfile: with open('experimental.pdb') as infile: for line in infile: if line.startswith('ATOM'): outfile.write(line) if line.startswith('HETATM'): if line[17:20] == 'MSE': atom_name = line[12:17] if atom_name == 'SE ': atom_name = ' SD ' line_fixed = 'ATOM ' + line[6:12] + atom_name + 'MET' + line[20:67] + '\n' outfile.write(line_fixed) # load the file into prody p = prody.parsePDB('no_smet.pdb') p = p.select('not hydrogen') # get one of the rosetta models r = prody.parsePDB('rosetta.pdb') # perform an alignment to find out what part of the crystal structure # corresponds to the rosetta file match = prody.matchChains(r, p, subset='all', overlap=25, pwalign=True)[0][1] print len(match) prody.writePDB('chain.pdb', match) # now clean it up with pdb fixer subprocess.check_call('python ~/Source/PdbFixer/pdbfixer.py chain.pdb', shell=True) # now load it with zam p = protein.Protein('output.pdb') p.Dehydrogen() disulfide_pairs = find_disulfide(p) for r1, r2 in disulfide_pairs: print ' added disulfide between {} and {}'.format(r1, r2) p.Res[r1].FullName = 'CYX' p.Res[r2].FullName = 'CYX' p.WritePdb('start.pdb') # now run tleap print ' running tleap' run_tleap(disulfide_pairs)
def calcS2(model_list, S2_records, S2_type, fit, fit_range): """Returns a dictonary with the average S2 values: S2_calced[residue] = value""" # fitting models reference = model_list[0] if fit and not PDB_model.is_fitted: print("Start FITTING") for i in range(1, len(model_list)): mobile = model_list[i] matches = prody.matchChains(reference, mobile) match = matches[0] ref_chain = match[0] mob_chain = match[1] if fit_range: weights = np.zeros((len(ref_chain), 1), dtype=np.int) fit_start, fit_end = fit_range.split('-') for i in range(int(fit_start) - 1, int(fit_end) - 1): weights[i] = 1 else: weights = np.ones((len(ref_chain), 1), dtype=np.int) t = prody.calcTransformation(mob_chain, ref_chain, weights) t.apply(mobile) PDB_model.is_fitted = True # get NH vectors from models (model_data[] -> vectors{resnum : vector}) model_data = [] s2_pairs = {'N': 'H', 'CA': 'HA'} for model in model_list: current_Resindex = 1 has_first, has_second = False, False vectors = {} for atom in model: # why not .getResnum() ??? atom_res = atom.getResindex() + 1 if atom_res != current_Resindex: current_Resindex = atom_res has_first, has_second = False, False if atom_res == current_Resindex: if atom.getName() == S2_type: has_second = True N_coords = Vec_3D(atom.getCoords()) elif atom.getName() == s2_pairs[S2_type]: has_first = True H_coords = Vec_3D(atom.getCoords()) if has_first and has_second: has_first, has_second = False, False vectors[atom_res] = Vec_3D(N_coords - H_coords).normalize() model_data.append(vectors) S2_calced = {} # iterating over STR records for resnum in [int(s2rec.resnum) for s2rec in S2_records]: x2, y2, z2, xy, xz, yz = 0, 0, 0, 0, 0, 0 # iterating over PDB models for m in model_data: # coordinates in model at a given resnum x, y, z = m[resnum].v[0], m[resnum].v[1], m[resnum].v[2] x2 += x**2 y2 += y**2 z2 += z**2 xy += x * y xz += x * z yz += y * z x2 /= len(model_data) y2 /= len(model_data) z2 /= len(model_data) xy /= len(model_data) xz /= len(model_data) yz /= len(model_data) # S2 calcuation s2 = 3 / 2.0 * (x2**2 + y2**2 + z2**2 + 2 * xy**2 + 2 * xz**2 + 2 * yz**2) - 0.5 S2_calced[resnum] = s2 return S2_calced
def s2_values( model_data, calculate_on_models, s2_records, s2_type, fit, fit_range ): """Returns a dictionary with the average S2 values: s2_calced[residue] = value""" if fit: reference = model_data.atomgroup[:] model_data.atomgroup.setACSIndex(0) prody.alignCoordsets(model_data.atomgroup.calpha) if fit_range: for model_num in calculate_on_models: model_data.atomgroup.setACSIndex(model_num) mobile = model_data.atomgroup[:] matches = prody.matchChains(reference, mobile) match = matches[0] ref_chain = match[0] mob_chain = match[1] weights = np.zeros((len(ref_chain), 1), dtype=np.int) fit_start, fit_end = fit_range.split("-") for i in range(int(fit_start) - 1, int(fit_end) - 1): weights[i] = 1 t = prody.calcTransformation(mob_chain, ref_chain, weights) t.apply(mobile) # get NH vectors from models (model_data[] -> vectors{resnum : vector}) vector_data = [] s2_pairs = {"N": "H", "CA": "HA"} h_coords = None n_coords = None for model_num in calculate_on_models: model_data.atomgroup.setACSIndex(model_num) current_resindex = 1 has_first, has_second = False, False vectors = {} for atom in model_data.atomgroup: atom_res = atom.getResnum() if atom_res != current_resindex: current_resindex = atom_res has_first, has_second = False, False if atom_res == current_resindex: if atom.getName() == s2_type: has_second = True n_coords = Vec3D(atom.getCoords()) elif atom.getName() == s2_pairs[s2_type]: has_first = True h_coords = Vec3D(atom.getCoords()) if has_first and has_second: has_first, has_second = False, False vectors[atom_res] = Vec3D( n_coords - h_coords ).normalize() vector_data.append(vectors) s2_calced = {} # iterating over STR records for resnum in [int(s2rec.resnum) for s2rec in s2_records]: x2, y2, z2, xy, xz, yz = 0, 0, 0, 0, 0, 0 # iterating over PDB models for m in vector_data: # coordinates in model at a given resnum x, y, z = m[resnum].v[0], m[resnum].v[1], m[resnum].v[2] x2 += x ** 2 y2 += y ** 2 z2 += z ** 2 xy += x * y xz += x * z yz += y * z x2 /= len(vector_data) y2 /= len(vector_data) z2 /= len(vector_data) xy /= len(vector_data) xz /= len(vector_data) yz /= len(vector_data) s2 = ( 3 / 2.0 * ( x2 ** 2 + y2 ** 2 + z2 ** 2 + 2 * xy ** 2 + 2 * xz ** 2 + 2 * yz ** 2 ) - 0.5 ) s2_calced[resnum] = s2 return s2_calced
def align(): global wd ans = wd + '/challengedata/answers' if os.path.isdir( ans) == False: #if the answers directory isnt formed make it os.mkdir(wd + '/challengedata/answers') rddir = wd + '/challengedata/rdkit-scripts' if os.path.isdir(rddir) == False: a = 'git clone https://github.com/dkoes/rdkit-scripts' os.system(a) data = os.listdir(wd + '/challengedata') for x in (data): #for each weeks data if x == "readme.txt" or x == "latest.txt" or x == "answers" or x == "rdkit-scripts" or x == 'PDBfiles' or x == 'visual.txt': pass else: toDir = wd + '/challengedata/answers/' + x if os.path.isdir( toDir ) == False: #if the path to answers dir doesnt exist os.mkdir(toDir) #make directory dock = os.listdir(wd + '/challengedata/' + x) for y in (dock): a = str(os.getcwd() + '/answers/' + x + '/' + y + '/lmcss_docked.sdf') if y == 'readme.txt' or y == 'new_release_structure_sequence_canonical.tsv' or y == 'new_release_structure_nonpolymer.tsv' or y == 'new_release_crystallization_pH.tsv' or y == 'new_release_structure_sequence.tsv': pass elif (os.path.isfile(a) == True): pass else: input = os.listdir(wd + '/challengedata/' + x + '/' + y) for z in (input): if z.startswith("LMCSS") and z.endswith(".pdb"): if (z.endswith("lig.pdb")): pass else: id = z.strip('.pdb') sts = str("grep ATOM " + z + " > lmcss_rec.pdb" ) #creates receptor .pdb file cd = wd + '/challengedata' os.chdir( cd + '/' + x + '/' + y) #change directory to week/ligand os.system( sts ) #runs and creates receptor .pbd file os.chdir(cd) #back to challenge directory input = os.listdir( cd + '/' + x + '/' + y ) #lists files inside ligand in certain week for z in (input): if z.endswith( ".smi" ): # changes .smi -> lig.sdf cd = str(os.getcwd()) sts = str(" " + cd + '/' + x + '/' + y + '/' + z + " lig.sdf --maxconfs 1") os.chdir(cd + '/' + x + '/' + y) os.system( cd + '/rdkit-scripts/rdconf.py' + sts) os.chdir(cd) for z in (input): # runs smina if z.endswith("lig.pdb"): sts = str( "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand " + z + " -o " + id + "_docked.sdf") cd = str( os.getcwd()) #lignad directory os.chdir(cd + '/' + x + '/' + y) #os.system(sts) sts = str( "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand " + z + " -o lmcss_docked.sdf") cd = str( os.getcwd()) #lignad directory os.chdir(cd + '/' + x + '/' + y) os.system(sts) os.chdir(cd) cur = str(os.getcwd() + '/answers/' + x + '/' + y) if (os.path.isdir(cur) == True): os.chdir(cd + '/' + x + '/' + y) os.getcwd() ## input = os.listdir(cd + '/' + x + '/' + y) for i in (input): if i.endswith( ".txt" ) and i != "center.txt" and i != "visual.txt": f = open(i) lines = f.readlines() ligand = lines[2].strip( 'ligand, ') ligand = ligand.replace( '\n', '') ligand = str(ligand) #gets the ligand from txt file if i.endswith("lig.pdb"): #see if pdb exists prody.fetchPDB(y) proteinPDB = prody.parsePDB(y) ourPDB = prody.parsePDB( 'lmcss_rec.pdb') a, b, seqid, overlap = prody.matchChains( proteinPDB, ourPDB)[0] b, protein_sp = prody.superpose( b, a, weights=None) b.select(ligand + '_ligand.pdb') sts = str("obrms -f " + i + ' ' + id + "_docked.sdf") #run obrms # parse results and output to the visualization txt file os.system(sts) f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str(cd + '/' + x + '/' + y + '/' + id + '_docked.sdf') print(input) ## for i in (input): if i.endswith("lig.pdb"): #see if pdb exists protein = prody.fetchPDB(y) #NEED NUMPY ARRAY prody.writeArray( 'lmcss_docked_array.sdf', array) prody.superpose( 'lmcss_docked.sdf', protein, weights=None) sts = str("obrms -f " + i + " lmcss_docked.sdf") #run obrms # parse results and output to the visualization txt file os.system(sts) os.chdir(wd + '/challengedata/') f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str( cd + '/' + x + '/' + y + '/lmcss_docked.sdf') todir = str(cd + '/answers/' + x + '/' + y + '/') shutil.copy(curdir, todir) print(curdir) break os.chdir(wd) else: os.mkdir(cur) os.chdir(cd + '/' + x + '/' + y) input = os.listdir(cd + '/' + x + '/' + y) for i in (input): if i.endswith( ".txt" ) and i != "center.txt" and i != "visual.txt": f = open(i) lines = f.readlines() ligand = lines[2].strip( "ligand, ") ligand = ligand.replace( '\n', '') ligand = str(ligand) #gets ligand from txt file if i.endswith("lig.pdb"): prody.fetchPDB(y) proteinPDB = prody.parsePDB(y) ourPDB = prody.parsePDB( 'lmcss_rec.pdb') prody.matchChains( proteinPDB, ourPDB) protein_sp = prody.superpose( ourPDB, proteinPDB, weights=None) protein_sp.select( ligand + '_ligand.pdb') sts = str("obrms -f " + i + ' ' + id + "_docked.sdf") os.system(sts) f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str(cd + '/' + x + '/' + y + '/' + id + '_docked.sdf') if i.endswith("lig.pdb"): protein = prody.fetchPDB(y) prody.writeArray( 'lmcss_docked_array.sdf', array) prody.superpose( 'lmcss_docked.sdf', protein, weights=None) sts = str("obrms -f " + i + " lmcss_docked.sdf") os.system(sts) os.chdir(wd + '/challengedata/') f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str( cd + '/' + x + '/' + y + '/lmcss_docked.sdf') todir = str(cd + '/answers/' + x + '/' + y + '/') shutil.copy(curdir, todir) print(curdir) break os.chdir(wd)