def findCuspStruct(pdb_a, pdb_b, ensemble_ref, m=m): ensemble = pd.Ensemble() ensemble.setAtoms(pdb_a) ensemble.setCoords(pdb_a.getCoords()) conf_i = pdb_a.copy() conf_f = pdb_b.copy() conf_f, T = pd.superpose(conf_f, conf_i) v = conf_f.getCoords() - conf_i.getCoords() for i in np.linspace(0, 1, m): q = i p = 1 - q coords = (p * v) + conf_i.getCoords() ensemble.addCoordset(coords) E_trans = calcMultiStateEnergy(ensemble, ensemble_ref, cutoff=r_c, k=k) E_trans = E_trans / np.max(E_trans) diff_E = abs(E_trans[0, :] - E_trans[1, :]) ind_trans = np.argmin(diff_E) coords = ensemble[ind_trans].getCoords() return (coords, diff_E[ind_trans])
def get_single_rmsd(reference, model): ref_backbone = reference.select('backbone or name OC2') mod_backbone = model.select('backbone or name OC2') prody.superpose(mod_backbone, ref_backbone) return prody.calcRMSD(mod_backbone, ref_backbone)
def calc(i, j): """calculate RMSD""" mob, trans = prody.superpose(j, i) return prody.calcRMSD(i, mob)
def prody_align(*pdbs, **kwargs): """Align models in a PDB file or multiple structures in separate PDB files. By default, protein chains will be matched based on selected atoms and alignment will be performed based on matching residues. If non-protein atoms are selected and selected atoms match in multiple structures, they will be used for alignment. :arg pdbs: PDB identifier(s) or filename(s) :arg select: atom selection string, default is :term:`calpha`, see :ref:`selections` :arg model: for NMR files, reference model index, default is ``1`` :arg seqid: percent sequence identity, default is ``90`` :arg overlap: percent sequence overlap, default is ``90`` :arg prefix: prefix for output file, default is PDB filename :arg suffix: output filename suffix, default is :file:`_aligned`""" from numpy import all from prody import LOGGER, writePDB, parsePDB from prody import alignCoordsets, printRMSD, matchAlign, superpose selstr = kwargs.get('select', 'calpha') suffix = kwargs.get('suffix', '_aligned') if len(pdbs) == 1: pdb = pdbs[0] LOGGER.info('Aligning multiple models in: ' + pdb) prefix = kwargs.get('prefix') model = kwargs.get('model') pdb = parsePDB(pdb) pdbselect = pdb.select(selstr) if pdbselect is None: subparser = kwargs.get('subparser') if subparser: subparser.error('Selection {0} do not match any atoms.'.format( repr(selstr))) else: raise ValueError('select does not match any atoms') LOGGER.info('{0} atoms will be used for alignment.'.format( len(pdbselect))) pdbselect.setACSIndex(model - 1) printRMSD(pdbselect, msg='Before alignment ') alignCoordsets(pdbselect) printRMSD(pdbselect, msg='After alignment ') outfn = (prefix or pdb.getTitle()) + suffix + '.pdb' LOGGER.info('Writing file: ' + outfn) writePDB(outfn, pdb) else: pdbs = list(pdbs) reffn = pdbs.pop(0) seqid = kwargs.get('seqid') overlap = kwargs.get('overlap') LOGGER.info('Aligning structures onto: ' + reffn) ref = parsePDB(reffn) ref_sel = ref.select(selstr) if ref_sel: LOGGER.info('Selection {0} matched {1} atoms.'.format( repr(selstr), len(ref_sel))) else: raise ValueError('selection {0} did not match any atoms'.format( repr(selstr))) match = True if ref_sel.numAtoms('ca') < 2: match = False for arg in pdbs: if arg == reffn: continue #if '_aligned.pdb' in arg: # continue LOGGER.info('Evaluating structure: ' + arg) pdb = parsePDB(arg) if match: result = matchAlign(pdb, ref, seqid=seqid, overlap=overlap, tarsel=selstr, allcsets=True, cslabel='Model', csincr=1) if result: outfn = pdb.getTitle() + suffix + '.pdb' LOGGER.info('Writing file: ' + outfn) writePDB(outfn, pdb) continue pdb_sel = pdb.select(selstr) LOGGER.info('Selection {0} matched {1} atoms.'.format( repr(selstr), len(pdb_sel))) if (len(pdb_sel) == len(ref_sel) and all(pdb_sel.getNames() == ref_sel.getNames())): printRMSD(ref_sel, pdb_sel, msg='Before alignment ') superpose(pdb_sel, ref_sel) printRMSD(ref_sel, pdb_sel, msg='After alignment ') outfn = pdb.getTitle() + suffix + '.pdb' LOGGER.info('Writing file: ' + outfn) writePDB(outfn, pdb) else: LOGGER.warn('Failed to align structure ' + arg + '.')
def score_interaction_and_dump(parsed, ifgresn, vdmresn, ifg_contact_atoms, vdm_contact_atoms, method, targetresi, cutoff, pdbix, pdbname): cutoff = float(cutoff) ifgtype, vdmtype, ifginfo, vdminfo = get_ifg_vdm(parsed, ifgresn, vdmresn, ifg_contact_atoms, vdm_contact_atoms, method) if ifgtype[1] != ['N', 'CA', 'C'] and ifgtype[1] != ['CA', 'C', 'O']: ifgresn = constants.AAname_rev[ifgtype[0]] vdmresn = constants.AAname_rev[vdmtype[0]] ifgatoms = ifgtype[1] vdmatoms = vdmtype[1] # filter for only vdmresn vdms of ifgresn with ifgatoms # and vdmatoms directly involved in interactions num_all_vdms, lookupdf = filter_contact(ifgresn, vdmresn, ifgatoms, vdmatoms) query = [] for atom in ifgatoms: query.append( parsed.select('chain {} and resnum {} and name {}'.format( ifginfo[0], ifginfo[1], atom)).getCoords()[0]) for atom in vdmatoms: query.append( parsed.select('chain {} and resnum {} and name {}'.format( vdminfo[0], vdminfo[1], atom)).getCoords()[0]) query = np.array(query) lookupcoords = pkl.load( open( '/home/gpu/Sophia/combs/st_wd/Lookups/refinedvdms/coords_of_{}.pkl' .format(ifgtype[0]), 'rb')) #lookupcoords = lookupcoords[:50] # delete ifglists = flip(ifgatoms, ifgresn) vdmlists = flip(vdmatoms, vdmresn) rmsds = [] num_atoms = len(query) coords_ls = [ item for item in lookupcoords if item[0] in lookupdf.index ] lookupatoms_to_clus = [] counter = 0 # to keep count of how many pdbs are being output for item in coords_ls: if len(item) == 3: compare_rmsds = [] ifg_vdm_ind = [] for ifg_ind, ifgls in enumerate(ifglists): for vdm_ind, vdmls in enumerate(vdmlists): lookupatoms = get_order_of_atoms( item, ifgresn, vdmresn, ifgls, vdmls) moved, transf = pr.superpose(lookupatoms, query) temp_rmsd = pr.calcRMSD(moved, query) compare_rmsds.append(temp_rmsd) ifg_vdm_ind.append([moved, temp_rmsd]) # item[0] is df index rmsds.append([item[0], min(compare_rmsds)]) # get index of which one had min rmsd for which_ind, each in enumerate(ifg_vdm_ind): if each[1] == min(compare_rmsds): lookupatoms_to_clus.append(each[0]) ######################################################################## # output pdb if low rmsd ######################################################################## if each[1] < cutoff and counter < 30 and which_ind == 0: # this is to ensure rmsd is below cutoff when not flipped # bc don't want to take care of that in prody to output pdb row = lookupdf.loc[item[0]] try: db_dir = '/home/gpu/Sophia/STcombs/20171118/database/reduce/' par = pr.parsePDB(db_dir + row['pdb'] + 'H.pdb') except: db_dir = '/home/gpu/Sophia/combs/st_wd/20180207_db_molprobity_biolassem/' par = pr.parsePDB(db_dir + row['pdb'] + 'H.pdb') ifgchid, ifgresnum = row['chid_ifg'], row[ 'resnum_ifg'] vdmchid, vdmresnum = row['chid_vdm'], row[ 'resnum_vdm'] printout = copy.deepcopy(par) printout = printout.select( '(chain {} and resnum {}) or (chain {} and resnum {})' .format(ifgchid, ifgresnum, vdmchid, vdmresnum)) printout.select('chain {} and resnum {}'.format( ifgchid, ifgresnum)).setChids('Y') printout.select('chain {} and resnum {}'.format( vdmchid, vdmresnum)).setChids('X') printout.select('all').setResnums(10) printout_interactamer = [] integrin_interactamer = [] try: # skip the ones that have segment ids. will prob need to update this # for the newly combed stuff for atom in ifgatoms: integrin_interactamer.append( parsed.select( 'chain {} and resnum {} and name {}' .format(ifginfo[0], ifginfo[1], atom))) printout_interactamer.append( printout.select( 'chain Y and resnum 10 and name {}' .format(atom))) for atom in vdmatoms: integrin_interactamer.append( parsed.select( 'chain {} and resnum {} and name {}' .format(vdminfo[0], vdminfo[1], atom))) printout_interactamer.append( printout.select( 'chain X and resnum 10 and name {}' .format(atom))) integrin_interactamer_prody = [] integrin_interactamer = sum( integrin_interactamer[1:], integrin_interactamer[0]) printout_interactamer = sum( printout_interactamer[1:], printout_interactamer[0]) try: assert len(integrin_interactamer) == len( printout_interactamer) interact_res = printout.select( '(chain X and resnum 10) or (chain Y and resnum 10)' ) interactamer_transf = pr.applyTransformation( transf, printout_interactamer) outdir = './output_data/pdbfiles/' threecode = constants.AAname[ifgresn] pr.writePDB( outdir + '{}_{}_{}_{}{}_{}{}_{}_{}'.format( pdbix, pdbname, targetresi, ifginfo[1], ifgresn, vdminfo[1], vdmresn, cutoff, row.name), interactamer_transf) counter += 1 except: pass except: traceback.print_exc() pass else: rmsds.append([int(item[0]), 100000]) # count how many NNs the query intrxn has num_nn, norm_metrics = get_NN(lookupatoms_to_clus, num_atoms, rmsds, query, cutoff, num_all_vdms) print('num NN') print(num_nn) exp_list = norm_metrics[-1] print('======= FOR NEAREST NEIGHBORS ==========') print('avg with single') print(exp_list[0]) print('avg without single') print(exp_list[1]) print('median with single') print(exp_list[2]) print('median without single') print(exp_list[3]) # do greedy clustering D = make_pairwise_rmsd_mat( np.array(lookupatoms_to_clus).astype('float32')) D = make_square(D) adj_mat = make_adj_mat(D, 0.5) mems, centroids = greedy(adj_mat) print('======= FOR GREEDY CLUS ==========') print('avg with singletons') print(np.mean([len(x) for x in mems])) print('avg without singletons') print(np.mean([len(x) for x in mems if len(x) > 1])) print('median with singletons') print(np.median([len(x) for x in mems])) print('median without singletons') print(np.median([len(x) for x in mems if len(x) > 1])) return ifginfo[0], ifginfo[1], ifgresn, vdminfo[0], vdminfo[1],\ vdmresn, ifgatoms, vdmatoms, num_nn, norm_metrics
'ILE': 8, 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7 } # esto simplemente toma que tipos de aminoĆ”cidos presenta esa proteĆna types = set(p.getResnames()) for folder_amino in types: # Me fijo que cantidad de archivos .pdb hay para saber cuanto tengo que iterar p1 = prody.parsePDB(".\\" + folder_pdb + "\\" + folder_amino + "\\amino" + str(0) + ".pdb") cant_arch = len(os.listdir('.\\' + folder_pdb + '\\' + folder_amino + '\\')) for num in range(cant_arch): file = ".\\" + folder_pdb + "\\" + folder_amino + "\\amino" + str( num) + ".pdb" p2 = prody.parsePDB(file) p3, t = prody.superpose(p2, p1) file3 = ".\\" + folder_pdb + "\\" + folder_amino + "\\amino" + str( num) + "_aligned.pdb" prody.writePDB(file3, p3)
def align(): global wd ans = wd + '/challengedata/answers' if os.path.isdir( ans) == False: #if the answers directory isnt formed make it os.mkdir(wd + '/challengedata/answers') rddir = wd + '/challengedata/rdkit-scripts' if os.path.isdir(rddir) == False: a = 'git clone https://github.com/dkoes/rdkit-scripts' os.system(a) data = os.listdir(wd + '/challengedata') for x in (data): #for each weeks data if x == "readme.txt" or x == "latest.txt" or x == "answers" or x == "rdkit-scripts" or x == 'PDBfiles' or x == 'visual.txt': pass else: toDir = wd + '/challengedata/answers/' + x if os.path.isdir( toDir ) == False: #if the path to answers dir doesnt exist os.mkdir(toDir) #make directory dock = os.listdir(wd + '/challengedata/' + x) for y in (dock): a = str(os.getcwd() + '/answers/' + x + '/' + y + '/lmcss_docked.sdf') if y == 'readme.txt' or y == 'new_release_structure_sequence_canonical.tsv' or y == 'new_release_structure_nonpolymer.tsv' or y == 'new_release_crystallization_pH.tsv' or y == 'new_release_structure_sequence.tsv': pass elif (os.path.isfile(a) == True): pass else: input = os.listdir(wd + '/challengedata/' + x + '/' + y) for z in (input): if z.startswith("LMCSS") and z.endswith(".pdb"): if (z.endswith("lig.pdb")): pass else: id = z.strip('.pdb') sts = str("grep ATOM " + z + " > lmcss_rec.pdb" ) #creates receptor .pdb file cd = wd + '/challengedata' os.chdir( cd + '/' + x + '/' + y) #change directory to week/ligand os.system( sts ) #runs and creates receptor .pbd file os.chdir(cd) #back to challenge directory input = os.listdir( cd + '/' + x + '/' + y ) #lists files inside ligand in certain week for z in (input): if z.endswith( ".smi" ): # changes .smi -> lig.sdf cd = str(os.getcwd()) sts = str(" " + cd + '/' + x + '/' + y + '/' + z + " lig.sdf --maxconfs 1") os.chdir(cd + '/' + x + '/' + y) os.system( cd + '/rdkit-scripts/rdconf.py' + sts) os.chdir(cd) for z in (input): # runs smina if z.endswith("lig.pdb"): sts = str( "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand " + z + " -o " + id + "_docked.sdf") cd = str( os.getcwd()) #lignad directory os.chdir(cd + '/' + x + '/' + y) #os.system(sts) sts = str( "smina -r lmcss_rec.pdb -l lig.sdf --autobox_ligand " + z + " -o lmcss_docked.sdf") cd = str( os.getcwd()) #lignad directory os.chdir(cd + '/' + x + '/' + y) os.system(sts) os.chdir(cd) cur = str(os.getcwd() + '/answers/' + x + '/' + y) if (os.path.isdir(cur) == True): os.chdir(cd + '/' + x + '/' + y) os.getcwd() ## input = os.listdir(cd + '/' + x + '/' + y) for i in (input): if i.endswith( ".txt" ) and i != "center.txt" and i != "visual.txt": f = open(i) lines = f.readlines() ligand = lines[2].strip( 'ligand, ') ligand = ligand.replace( '\n', '') ligand = str(ligand) #gets the ligand from txt file if i.endswith("lig.pdb"): #see if pdb exists prody.fetchPDB(y) proteinPDB = prody.parsePDB(y) ourPDB = prody.parsePDB( 'lmcss_rec.pdb') a, b, seqid, overlap = prody.matchChains( proteinPDB, ourPDB)[0] b, protein_sp = prody.superpose( b, a, weights=None) b.select(ligand + '_ligand.pdb') sts = str("obrms -f " + i + ' ' + id + "_docked.sdf") #run obrms # parse results and output to the visualization txt file os.system(sts) f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str(cd + '/' + x + '/' + y + '/' + id + '_docked.sdf') print(input) ## for i in (input): if i.endswith("lig.pdb"): #see if pdb exists protein = prody.fetchPDB(y) #NEED NUMPY ARRAY prody.writeArray( 'lmcss_docked_array.sdf', array) prody.superpose( 'lmcss_docked.sdf', protein, weights=None) sts = str("obrms -f " + i + " lmcss_docked.sdf") #run obrms # parse results and output to the visualization txt file os.system(sts) os.chdir(wd + '/challengedata/') f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str( cd + '/' + x + '/' + y + '/lmcss_docked.sdf') todir = str(cd + '/answers/' + x + '/' + y + '/') shutil.copy(curdir, todir) print(curdir) break os.chdir(wd) else: os.mkdir(cur) os.chdir(cd + '/' + x + '/' + y) input = os.listdir(cd + '/' + x + '/' + y) for i in (input): if i.endswith( ".txt" ) and i != "center.txt" and i != "visual.txt": f = open(i) lines = f.readlines() ligand = lines[2].strip( "ligand, ") ligand = ligand.replace( '\n', '') ligand = str(ligand) #gets ligand from txt file if i.endswith("lig.pdb"): prody.fetchPDB(y) proteinPDB = prody.parsePDB(y) ourPDB = prody.parsePDB( 'lmcss_rec.pdb') prody.matchChains( proteinPDB, ourPDB) protein_sp = prody.superpose( ourPDB, proteinPDB, weights=None) protein_sp.select( ligand + '_ligand.pdb') sts = str("obrms -f " + i + ' ' + id + "_docked.sdf") os.system(sts) f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str(cd + '/' + x + '/' + y + '/' + id + '_docked.sdf') if i.endswith("lig.pdb"): protein = prody.fetchPDB(y) prody.writeArray( 'lmcss_docked_array.sdf', array) prody.superpose( 'lmcss_docked.sdf', protein, weights=None) sts = str("obrms -f " + i + " lmcss_docked.sdf") os.system(sts) os.chdir(wd + '/challengedata/') f = open('visual.txt', 'ab+') f.write(x + ' smina ' + y + '\n') f.close curdir = str( cd + '/' + x + '/' + y + '/lmcss_docked.sdf') todir = str(cd + '/answers/' + x + '/' + y + '/') shutil.copy(curdir, todir) print(curdir) break os.chdir(wd)
def prody_align(*pdbs, **kwargs): """Align models in a PDB file or multiple structures in separate PDB files. By default, protein chains will be matched based on selected atoms and alignment will be performed based on matching residues. If non-protein atoms are selected and selected atoms match in multiple structures, they will be used for alignment. :arg pdbs: PDB identifier(s) or filename(s) :arg select: atom selection string, default is :term:`calpha`, see :ref:`selections` :arg model: for NMR files, reference model index, default is ``1`` :arg seqid: percent sequence identity, default is ``90`` :arg overlap: percent sequence overlap, default is ``90`` :arg prefix: prefix for output file, default is PDB filename :arg suffix: output filename suffix, default is :file:`_aligned`""" from numpy import all from prody import LOGGER, writePDB, parsePDB from prody import alignCoordsets, printRMSD, matchAlign, superpose selstr = kwargs.get('select', 'calpha') suffix = kwargs.get('suffix', '_aligned') if len(pdbs) == 1: pdb = pdbs[0] LOGGER.info('Aligning multiple models in: ' + pdb) prefix = kwargs.get('prefix') model = kwargs.get('model') pdb = parsePDB(pdb) pdbselect = pdb.select(selstr) if pdbselect is None: subparser = kwargs.get('subparser') if subparser: subparser.error('Selection {0} do not match any atoms.' .format(repr(selstr))) else: raise ValueError('select does not match any atoms') LOGGER.info('{0} atoms will be used for alignment.' .format(len(pdbselect))) pdbselect.setACSIndex(model-1) printRMSD(pdbselect, msg='Before alignment ') alignCoordsets(pdbselect) printRMSD(pdbselect, msg='After alignment ') outfn = (prefix or pdb.getTitle()) + suffix + '.pdb' LOGGER.info('Writing file: ' + outfn) writePDB(outfn, pdb) else: pdbs = list(pdbs) reffn = pdbs.pop(0) seqid = kwargs.get('seqid') overlap = kwargs.get('overlap') LOGGER.info('Aligning structures onto: ' + reffn) ref = parsePDB(reffn) ref_sel = ref.select(selstr) if ref_sel: LOGGER.info('Selection {0} matched {1} atoms.' .format(repr(selstr), len(ref_sel))) else: raise ValueError('selection {0} did not match any atoms' .format(repr(selstr))) match = True if ref_sel.numAtoms('ca') < 2: match = False for arg in pdbs: if arg == reffn: continue #if '_aligned.pdb' in arg: # continue LOGGER.info('Evaluating structure: ' + arg) pdb = parsePDB(arg) if match: result = matchAlign(pdb, ref, seqid=seqid, overlap=overlap, tarsel=selstr, allcsets=True, cslabel='Model', csincr=1) if result: outfn = pdb.getTitle() + suffix + '.pdb' LOGGER.info('Writing file: ' + outfn) writePDB(outfn, pdb) continue pdb_sel = pdb.select(selstr) LOGGER.info('Selection {0} matched {1} atoms.' .format(repr(selstr), len(pdb_sel))) if (len(pdb_sel) == len(ref_sel) and all(pdb_sel.getNames() == ref_sel.getNames())): printRMSD(ref_sel, pdb_sel, msg='Before alignment ') superpose(pdb_sel, ref_sel) printRMSD(ref_sel, pdb_sel, msg='After alignment ') outfn = pdb.getTitle() + suffix + '.pdb' LOGGER.info('Writing file: ' + outfn) writePDB(outfn, pdb) else: LOGGER.warn('Failed to align structure ' + arg + '.')