def na(): cmd.view('v', 'store'); object = cmd.get_names()[0] pdbid = object[0:4] chainid = object[4:5] cmd.fetch(pdbid) cmd.select("design_na","%s and chain %s"%(object,"B")) cmd.select("native_na","%s and chain %s"%(pdbid,chainid)) cmd.select("other_na","%s and not chain %s"%(pdbid,chainid)) cmd.hide("everything","other") cmd.hide('(resn HOH)') cmd.super("design_na","native_na") cmd.select("none") cmd.orient(object) cmd.system("rm %s.pdb"%(pdbid)); cmd.view('v', 'recall')
def na(): cmd.view('v', 'store') object = cmd.get_names()[0] pdbid = object[0:4] chainid = object[4:5] cmd.fetch(pdbid) cmd.select("design_na", "%s and chain %s" % (object, "B")) cmd.select("native_na", "%s and chain %s" % (pdbid, chainid)) cmd.select("other_na", "%s and not chain %s" % (pdbid, chainid)) cmd.hide("everything", "other") cmd.hide('(resn HOH)') cmd.super("design_na", "native_na") cmd.select("none") cmd.orient(object) cmd.system("rm %s.pdb" % (pdbid)) cmd.view('v', 'recall')
def na(): cmd.view('v', 'store') object = cmd.get_names()[0] if object[0] == 'd': pdbid = object[1:5] chainid = object[5:6] else: pdbid = object[0:4] chainid = object[4:5] cmd.fetch(pdbid) cmd.select("design_na", "%s and chain %s and not hydro" % (object, "B")) cmd.select("native_na", "%s and chain %s and not hydro" % (pdbid, chainid)) cmd.select("other_na", "%s and not chain %s" % (pdbid, chainid)) cmd.hide("everything", "other") cmd.hide('(resn HOH)') cmd.super("native_na", "design_na") cmd.hide("lines", "all") cmd.show("sticks", "native_na") cmd.show("cartoon", "native_na") cmd.select("none") cmd.orient(object) pdbid = pdbid.lower() cmd.system("rm %s.pdb" % (pdbid)) cmd.view('v', 'recall')
def na(): cmd.view('v', 'store'); object = cmd.get_names()[0] if object[0] == 'd': pdbid = object[1:5] chainid = object[5:6] else: pdbid = object[0:4] chainid = object[4:5] cmd.fetch(pdbid) cmd.select("design_na","%s and chain %s and not hydro"%(object,"B")) cmd.select("native_na","%s and chain %s and not hydro"%(pdbid,chainid)) cmd.select("other_na","%s and not chain %s"%(pdbid,chainid)) cmd.hide("everything","other") cmd.hide('(resn HOH)') cmd.super("native_na","design_na") cmd.hide("lines","all"); cmd.show("sticks","native_na"); cmd.show("cartoon","native_na"); cmd.select("none") cmd.orient(object) pdbid = pdbid.lower() cmd.system("rm %s.pdb"%(pdbid)); cmd.view('v', 'recall')
def nat(pdbid,chainid): cmd.view('v', 'store'); object = cmd.get_names()[0] cmd.fetch(pdbid) cmd.select("design_nat","%s and chain %s and not hydro"%(object,"B")) cmd.select("native_nat","%s and chain %s and not hydro"%(pdbid,chainid)) cmd.select("other_nat","%s and not chain %s"%(pdbid,chainid)) cmd.hide("everything","other") cmd.hide('(resn HOH)') cmd.super("design_nat","native_nat") cmd.select("none") cmd.orient(object) cmd.hide("lines","all"); cmd.show("sticks","native_nat"); cmd.show("cartoon","native_nat"); cmd.system("rm %s.pdb"%(pdbid)); cmd.view('v', 'recall')
def nat(pdbid, chainid): cmd.view('v', 'store') object = cmd.get_names()[0] cmd.fetch(pdbid) cmd.select("design_nat", "%s and chain %s and not hydro" % (object, "B")) cmd.select("native_nat", "%s and chain %s and not hydro" % (pdbid, chainid)) cmd.select("other_nat", "%s and not chain %s" % (pdbid, chainid)) cmd.hide("everything", "other") cmd.hide('(resn HOH)') cmd.super("design_nat", "native_nat") cmd.select("none") cmd.orient(object) cmd.hide("lines", "all") cmd.show("sticks", "native_nat") cmd.show("cartoon", "native_nat") cmd.system("rm %s.pdb" % (pdbid)) cmd.view('v', 'recall')
def draw_axis(chA, chB, scale_factor=20, w=0.6, r1=1, g1=1, b1=1, r2=1, g2=0, b2=0): T = transf_matrix(chA, chB) angle=angle_axis(chA, chB) angle_degrees=(angle*180)/math.pi axis1=[direction_cosines(chA, chB)[0], direction_cosines(chA, chB)[1], direction_cosines(chA, chB)[2]] p = nearest_point_to_axis(chA, chB) x1, y1, z1 = p[0] + (3*scale_factor*axis1[0]), p[1] + (3*scale_factor*axis1[1]), p[2] + (3*scale_factor*axis1[2]) x2, y2, z2 = p[0] - (3*scale_factor*axis1[0]), p[1] - (3*scale_factor*axis1[1]), p[2] - (3*scale_factor*axis1[2]) obj = [cgo.CYLINDER, x1, y1, z1, x2, y2, z2, w, r1, g1, b1, r2, g2, b2, 0.0] cmd.load_cgo(obj, angle_degrees) cmA=center_of_Mass(chA) cmB=cmW cmAver=(cmB+cmA)/2 vector=numpy.array([(cmB[0]-cmA[0]), (cmB[1]-cmA[1]), (cmB[2]-cmA[2])]) moduli_vector=numpy.linalg.norm(vector) vector_director=numpy.array([(cmB[0]-cmA[0])/moduli_vector, (cmB[1]-cmA[1])/moduli_vector, (cmB[2]-cmA[2])/moduli_vector]) pC_A = proyeccion_centroide(chA, chA, chB) pC_B = proyeccion_centroide_working(chA, chB) trans_vector = numpy.array([(pC_B[0]-pC_A[0]), (pC_B[1]-pC_A[1]), (pC_B[2]-pC_A[2])]) modu_tr = numpy.linalg.norm(trans_vector) rota_centroid_rad=numpy.dot(vector_director, axis1) rota_centroid = (rota_centroid_rad*180)/math.pi rota_centroid_absol_0= numpy.absolute(rota_centroid) rota_centroid_absol=round(rota_centroid_absol_0,2) if rota_centroid_absol == 0.00: p1 = '_1' p2 = '_2' p3 = '_3' cmd.pseudoatom (pos=[cmA[0], cmA[1], cmA[2]], object=p1) cmd.pseudoatom (pos=[pC_A[0], pC_A[1], pC_A[2]], object=p2) cmd.pseudoatom (pos=[cmB[0], cmB[1], cmB[2]], object=p3) cmd.angle(None, p1, p2, p3) print_information(T, axis1, angle_degrees, moduli_vector, obj, x1, y1, z1, x2, y2, z2, w, r1, g1, b1, r2, g2, b2) if rota_centroid_absol != 0: p1 = '_1' p2 = '_2' p3 = '_3' p4 = '_4' cmd.pseudoatom (pos=[cmA[0], cmA[1], cmA[2]], object=p1) cmd.pseudoatom (pos=[pC_A[0], pC_A[1], pC_A[2]], object=p2) cmd.pseudoatom (pos=[pC_B[0], pC_B[1], pC_B[2]], object=p3) cmd.pseudoatom (pos=[cmB[0], cmB[1], cmB[2]], object=p4) cmd.dihedral(None, p1, p2, p3, p4) cmd.distance(None, p2, p3) print_information(T, axis1, angle_degrees, moduli_vector, obj, x1, y1, z1, x2, y2, z2, w, r1, g1, b1, r2, g2, b2, modu_tr) cmd.create('working', chA) cmd.super('working', chB)
def chain_align_save(*args, **kwargs): args = [ast.literal_eval(kvpair) for kvpair in args] for pair in args: cmd.fetch(str.lower(pair[0])) create_subchain_object(pair[0], pair[1]) cmd.delete(str.lower(pair[0])) for mobile in [ '{}.{}'.format(model, chain) for (model, chain) in args ][1:]: cmd.super(mobile, "{}.{}".format(args[0][0], args[0][1]),reset=1,transform=1,quiet=0) cmd.reset() cmd.save('alignment.cif')
def drawAnnote(self, axis, x, y, annote): # EDIT: remove last markers for markers in self.drawnAnnotations.itervalues(): for m in markers: # tuple #m.set_visible(not m.get_visible()) m.remove() self.axis.figure.canvas.draw() self.drawnAnnotations = {} if (x,y) in self.drawnAnnotations: markers = self.drawnAnnotations[(x,y)] for m in markers: m.set_visible(not m.get_visible()) self.axis.figure.canvas.draw() else: """ Mark data point and show data """ t = axis.text(x,y, "(%3.2f, %3.2f)"%(x,y), ) m = axis.scatter([x],[y], marker='d', c='r', zorder=100) self.drawnAnnotations[(x,y)] =(t,m) self.axis.figure.canvas.draw() """Your code here!""" cmd.delete('all') # load pdb files pdb_ext = '.pdb' cmd.load(self.native) cmd.load(os.path.join(self.pdb_dir, annote + pdb_ext)) # label cmd.pseudoatom('foo') cmd.hide('all') #cmd.label('foo', '"'+annote+'"') # color native_name = os.path.splitext(os.path.basename(self.native))[0] cmd.color('green', native_name) cmd.set('label_color', 'green', native_name) cmd.color('blue', annote) cmd.set('label_color', 'blue', annote) cmd.set('label_color', 'blue', 'foo') # background cmd.bg_color("white") cmd.set("depth_cue", 0) cmd.set("ray_trace_fog", 0) # view cmd.show(representation='cartoon') # alignment cmd.super(annote, native_name) # zoom cmd.zoom()
def transf_matrix(chA, chB): ''' DESCRIPTION Align two selections/chains, and returns the transformation matrix. I used super to carry out the alignment, likely is possible to use cmd.align and is going to be a bit faster, but I think is not going to work well with low-sequence-identity alignments. ''' cmd.create('working', chA) cmd.super('working', chB) T = cmd.get_object_matrix('working') global cmW cmW = center_of_Mass('working') cmd.delete('working') return T
def testSuper(self): cmd.load(self.datafile("1oky-frag.pdb"), "m1") cmd.load(self.datafile("1t46-frag.pdb"), "m2") r = cmd.super("m1", "m2", object="aln") self.assertAlmostEqual(r[0], 0.9667, delta=1e-4) self.assertEqual(r[1], 172) self.assertEqual(r[1], cmd.count_atoms("aln") / 2)
def pdb_merge(pdb_id1, pdb_id2, file_out_name): ''' This subroutine takes in two pdb_ids that you would like to merge, and acesses their files in order to write a copy of the pdb_id file corresponding to pdb_id1, with the Bfactor column of the alpha carbon modified to include the rmsd values generated by rmsd_b to column 11 for image creation by b-factor putty visualization Arguments: pdb_id1 -- a pdb_id for the first protein strucutre you would like to compare pdb_id2 --a pdb_id for the second protein strucutre you would like to compare file_out -- a string representing the name of the merged pdb id output file you wish to write to (THIS MUST BE A .pdb) ''' cmd.super(pdb_id1, pdb_id2, object='alignment1') cmd.save('alignment1.pdb') cmd.disable('alignment1') selection1 = pdb_id1 + ' & alignment1' selection2 = pdb_id2 + ' & alignment1' print 'rmsd_b selection1 = {}'.format(selection1) print 'rmsd_b selection2 = {}'.format(selection2) rmsd_b(selection1, selection2) iterate_cmd = pdb_id1 + ' and name CA' cmd.iterate(iterate_cmd, "alignment[resi] = b") # print the following for debug... cmd.save(pdb_id1 + '_mod.pdb', pdb_id1) cmd.save('rmsd_b_selection1.pdb', selection1) print 'Output modified {} as {}'.format(pdb_id1, file_out_name) sys.stdout.flush() with open(pdb_id1 + '.pdb') as f_in: with open(file_out_name, 'w') as f_out: for line in f_in: line_temp = line.strip().split() if line_temp[0] != 'ATOM': f_out.write(line) continue if line_temp[2] != 'CA': f_out.write(line) elif line_temp[2] == 'CA': if line_temp[5] in alignment.keys(): line_out = line[0:61] + str( round(alignment[line_temp[5]], 2)) + line[66:] f_out.write(line_out) else: print 'error: key {} not in alignment dictionary'.format( line_temp[5])
def super_all(target=None,mobile_selection='name ca',target_selection='name ca',cutoff=2, cycles=5,cgo_object=0): """ Superimposes all models in a list to one target using the "super" algorithm usage: super_all [target][target_selection=name ca][mobile_selection=name ca][cutoff=2][cycles=5][cgo_object=0] where target specifies is the model id you want to superimpose all others against, and selection, cutoff and cycles are options passed to the super command. By default the selection is all C-alpha atoms and the cutoff is 2 and the number of cycles is 5. Setting cgo_object to 1, will cause the generation of an superposition object for each object. They will be named like <object>_on_<target>, where <object> and <target> will be replaced by the real object and target names. Example: super_all target=name1, mobile_selection=c. b & n. n+ca+c+o,target_selection=c.a & n. n+ca+c+o """ cutoff = int(cutoff) cycles = int(cycles) cgo_object = int(cgo_object) object_list = cmd.get_names() object_list.remove(target) rmsd = {} rmsd_list = [] objectname = 'super_on_%s' % target for i in range(len(object_list)): if cgo_object: # objectname = 'super_%s_on_%s' % (object_list[i],target) rms = cmd.super('%s & %s'%(object_list[i],mobile_selection),'%s & %s'%(target,target_selection),cutoff=cutoff,cycles=cycles,object=objectname) else: rms = cmd.super('%s & %s'%(object_list[i],mobile_selection),'%s & %s'%(target,target_selection),cutoff=cutoff,cycles=cycles) rmsd[object_list[i]] = rms[0] rmsd_list.append((rms[0],object_list[i])) rmsd_list.sort() # loop over dictionary and print out matrix of final rms values print "Superimposing against:",target for object_name in object_list: print "%s: %6.3f" % (object_name,rmsd[object_name]) for r in rmsd_list: print "%6.3f %s" % r
def superpose_in_pymol(self, mobile_selection, fixed_selection, save_superposed_structure=True, output_directory=None): """ Superpose 'mobile' to 'fixed' in PyMOL. """ if not output_directory: output_directory = self.pymod.structures_dirpath if hasattr(cmd, "super"): # 'super' is sequence-independent. cmd.super(mobile_selection, fixed_selection) else: # PyMOL 0.99 does not have 'cmd.super'. cmd.align(mobile_selection, fixed_selection) if save_superposed_structure: cmd.save(os.path.join(output_directory, mobile_selection + ".pdb"), mobile_selection)
def load_all(fl, template): cmd.load(template) template_ID = template.split('.pdb')[0] for f in fl: f_ID = f.split('.pdb')[0] cmd.load(f) align = cmd.super("%s" % f_ID, "%s" % template_ID) print (align)
def load_all(fl, template): cmd.load(template) template_ID = template.split('.pdb')[0] f_ID_list = [f.split('.pdb')[0] for f in fl] for f_ID in f_ID_list: cmd.load('.'.join((f_ID, 'pdb'))) align = cmd.super("%s" % f_ID, "%s" % template_ID) #cmd.save("%s_oriented.pdb" %f_ID, f, -1) print f, f_ID for f_ID in f_ID_list: cmd.save("%s_aln.pdb" %f_ID, f_ID)
def measure_rmsd(pdbfile1, pdbfile2, mode='align'): """ To measure RMSD between two pdbfiles using pymol user can specify modes b/n align, cealign and super. default is align which is good for structurally similar pdbs """ pymol.finish_launching(["pymol", "-qc"]) cmd.load(pdbfile1, 'pdb_file1') cmd.load(pdbfile2, 'pdb_file2') if mode == 'align': res = cmd.align('pdb_file2', 'pdb_file1') RMSD = res[0] elif mode == 'cealign': res = cmd.cealign('pdb_file2', 'pdb_file1') RMSD = res['RMSD'] elif mode == 'super': res = cmd.super('pdb_file2', 'pdb_file1') RMSD = res[0] cmd.delete("*") return RMSD
# running by command # pymol -cqr this_script.py from pymol import cmd import os, glob, re # working dir of the script, also output dir for prepare_receptor4.py pdbqt_dir = 'receptors/wt-ensemble' # relative path from working dir above reference_structure = "../../structures/NA_2HU4.pdb" pdb_dir = '../../clustering/WT/tmp/' pdb_aligned_dir = '../../clustering/WT/ensemble/' cmd_PREPARE_RECEPTOR = 'pythonsh /home/Ubuntu/tk/Programs/mgltools_i86Linux2_1.5.4/\ MGLToolsPckgs/AutoDockTools/Utilities24/prepare_receptor4.py' os.chdir(pdbqt_dir) cmd.load(reference_structure, 'ref') for f in glob.glob(pdb_dir + '/cluster-????.pdb'): cmd.load(f, 'obj1') new_pdb = re.sub("^.*\/", pdb_aligned_dir, f) # print new_pdb cmd.super('obj1', 'ref') cmd.save(new_pdb, 'obj1') cmd.delete('obj1') os.system("%s -r %s" % (cmd_PREPARE_RECEPTOR, new_pdb))
def ColorByDisplacementAll(objSel1, objSel2, super1='all', super2='all', doColor="True", doAlign="True", AlignedWhite='yes'): # First create backup copies; names starting with __ (underscores) are normally hidden by PyMOL tObj1, tObj2, aln = "__tempObj1", "__tempObj2", "__aln" if strTrue(doAlign): # Create temp objects cmd.create(tObj1, objSel1) cmd.create(tObj2, objSel2) # Align and make create an object aln which indicates which atoms were paired between the two structures # Super is must faster than align http://www.pymolwiki.org/index.php/Super cmd.super(tObj1 + ' and ' + str(super1), tObj2 + ' and ' + str(super2), object=aln) # Modify the original matrix of object1 from the alignment cmd.matrix_copy(tObj1, objSel1) else: # Create temp objects cmd.create(tObj1, objSel1) cmd.create(tObj2, objSel2) # Align and make create an object aln which indicates which atoms were paired between the two structures # Super is must faster than align http://www.pymolwiki.org/index.php/Super cmd.super(tObj1 + ' and ' + str(super1), tObj2 + ' and ' + str(super2), object=aln) # Modify the B-factor columns of the original objects, # in order to identify the residues NOT used for alignment, later on cmd.alter(objSel1 + " or " + objSel2, "b=-0.2") cmd.alter(tObj1 + " or " + tObj2, "chain='A'") cmd.alter(tObj1 + " or " + tObj2, "segi='A'") # Update pymol internal representations; one of these should do the trick cmd.refresh() cmd.rebuild() cmd.sort(tObj1) cmd.sort(tObj2) # Create lists for storage stored.alnAres, stored.alnBres = [], [] # Iterate over objects and get resi if AlignedWhite == 'yes': cmd.iterate(tObj1 + " and not " + aln, "stored.alnAres.append((resi, name))") cmd.iterate(tObj2 + " and not " + aln, "stored.alnBres.append((resi, name))") else: cmd.iterate(tObj1, "stored.alnAres.append((resi, name))") cmd.iterate(tObj2, "stored.alnBres.append((resi, name))") # Change the B-factors for EACH object displacementUpdateBAll(tObj1, stored.alnAres, tObj2, stored.alnBres) # Store the NEW B-factors stored.alnAnb, stored.alnBnb = [], [] # Iterate over objects and get b if AlignedWhite == 'yes': # Iterate over objects which is not aligned cmd.iterate(tObj1 + " and not " + aln, "stored.alnAnb.append(b)") cmd.iterate(tObj2 + " and not " + aln, "stored.alnBnb.append(b)") else: # Or Iterate over all objects with CA cmd.iterate(tObj1, "stored.alnAnb.append(b)") cmd.iterate(tObj2, "stored.alnBnb.append(b)") # Get rid of all intermediate objects and clean up cmd.delete(tObj1) cmd.delete(tObj2) cmd.delete(aln) # Assign the just stored NEW B-factors to the original objects print "Sooon ready. 1 more minute" for x in range(len(stored.alnAres)): cmd.alter(objSel1 + " and resi " + str(stored.alnAres[x][0]) + " and name " + str(stored.alnAres[x][1]), "b = " + str(stored.alnAnb[x])) for x in range(len(stored.alnBres)): cmd.alter(objSel2 + " and resi " + str(stored.alnBres[x][0]) + " and name " + str(stored.alnBres[x][1]), "b = " + str(stored.alnBnb[x])) cmd.rebuild() cmd.refresh() cmd.sort(objSel1) cmd.sort(objSel2) # Provide some useful information stored.allRMSDval = [] stored.allRMSDval = stored.alnAnb + stored.alnBnb print "\nColorByDisplacementAll completed successfully." print "The MAXIMUM Displacement is: " + str(max(stored.allRMSDval)) + " residue " + str(stored.alnAres[int(stored.allRMSDval.index(max(stored.allRMSDval)))]) if strTrue(doColor): # Showcase what we did # cmd.orient() # cmd.hide("all") cmd.show("sticks", objSel1 + " or " + objSel2) # Select the residues not used for alignment; they still have their B-factors as "-0.2" cmd.select("notUsedForAln", "b = -0.2") # White-wash the residues not used for alignment cmd.color("white", "notUsedForAln") # Select the residues not in both pdb files; they have their B-factors as "-0.01" cmd.select("ResNotInBothPDB", "b = -0.01") # White-wash the residues not used for alignment cmd.color("black", "ResNotInBothPDB") # Color the residues used for alignment according to their B-factors (Displacement values) #cmd.spectrum("b", 'rainbow', "((" + objSel1 + ") or (" + objSel2 +" )) and not notUsedForAln+ResNotInBothPDB") cmd.spectrum("b", 'rainbow', "((" + objSel1 + ") or (" + objSel2 + " )) and not (notUsedForAln or ResNotInBothPDB)") # Delete the selection of atoms not used for alignment # If you would like to keep this selection intact, # just comment "cmd.delete" line and # uncomment the "cmd.disable" line abowe. cmd.disable("notUsedForAln") cmd.delete("notUsedForAln") cmd.disable("ResNotInBothPDB") cmd.delete("ResNotInBothPDB") print "\nObjects are now colored by C-alpha displacement deviation." print "Blue is minimum and red is maximum..." print "White is those residues used in the alignment algorithm. Can be turned off in top of algorithm." print "Black is residues that does not exist in both files..."
def makePDBwithConservedWaters(ProteinsList, temp_dir, outdir, save_sup_files): logger.info( 'Minimum desired degree of conservation is : %s' % ProteinsList.probability ) cmd.delete('cwm_*') logger.info( 'Loading all pdb chains ...' ) for protein in ProteinsList: cmd.load(os.path.join(temp_dir, protein.pdb_filename),'cwm_%s' % protein.pdb_id) cmd.remove('(hydro) and cwm_%s' % protein.pdb_id) cmd.select('dods','resn dod') cmd.alter('dods', 'resn="HOH"') cmd.create('cwm_%s' % protein, 'cwm_%s & chain %s' % (protein.pdb_id, protein.chain)) cmd.delete( 'cwm_%s' % protein.pdb_id ) logger.info( 'Superimposing all pdb chains ...' ) for protein in ProteinsList[1:]: logger.info( 'Superimposing %s' % protein ) cmd.super('cwm_%s////CA' % protein, 'cwm_%s////CA' % ProteinsList[0]) cmd.orient( 'cwm_%s' % ProteinsList[0] ) logger.info( 'Creating new, water only, pymol objects for each pdb chain ...' ) for protein in ProteinsList: cmd.create('cwm_%s_Water' % protein, 'cwm_%s & resname hoh' % protein) logger.info( 'Storing water molecules and proteins in separate pdb files for each pdb chain ...' ) for protein in ProteinsList: cmd.save(os.path.join(temp_dir, 'cwm_%s.pdb' % protein), 'cwm_%s' % protein) cmd.save(os.path.join(temp_dir, 'cwm_%s_Water.pdb' % protein), 'cwm_%s_Water' % protein) cmd.delete('cwm_*') ### filter ProteinsList by mobility or normalized B factor cutoff logger.debug( 'Protein chains list is %s proteins long.' % len(ProteinsList.proteins) ) if ProteinsList.refinement != 'No refinement': length = len(ProteinsList.proteins) if ProteinsList.refinement == 'Mobility': logger.info( 'Filtering water oxygen atoms by mobility ...' ) for protein in reversed(ProteinsList.proteins): if str(protein) != str(ProteinsList.selectedPDBChain): if not okMobility(os.path.join(temp_dir, 'cwm_%s_Water.pdb' % protein)): ProteinsList.proteins.remove(protein) if ProteinsList.refinement == 'Normalized B-factor': logger.info( 'Filtering water oxygen atoms by Normalized B-factor' ) for protein in reversed(ProteinsList.proteins): if str(protein) != str(ProteinsList.selectedPDBChain): if not okBfactor(os.path.join(temp_dir, 'cwm_%s_Water.pdb' % protein)): ProteinsList.proteins.remove(protein) logger.debug( 'filtered proteins chains list is %s proteins long :' % len(ProteinsList.proteins) ) """ Filtered ProteinsList """ selectedPDBChain = str(ProteinsList.selectedPDBChain) if not os.path.exists(os.path.join(outdir,selectedPDBChain)): os.mkdir(os.path.join(outdir,selectedPDBChain)) if save_sup_files: for file in glob.glob(os.path.join(temp_dir, 'cwm_????_?.pdb')): shutil.copy(file, os.path.join(outdir,selectedPDBChain)) # Only if ProteinsList has more than one protein if len(ProteinsList.proteins) > 1: water_coordinates = list() waterIDCoordinates = {} water_ids = list() for protein in ProteinsList: protein.calculate_water_coordinates( temp_dir ) logger.debug( 'Protein %s has %i coordinates.' % (protein, len(protein.water_coordinates))) water_coordinates += protein.water_coordinates water_ids += protein.water_ids if water_coordinates: # Only if there are any water molecules list of similar protein structures. logger.info( 'Number of water molecules to cluster: %i' % len(water_coordinates) ) if len(water_coordinates) != 1: # Only if the total number of water molecules to cluster is less than 50000. if len(water_coordinates) < 50000: cwm_count = 0 logger.info( 'Clustering the water coordinates ...' ) # The clustering returns a list of clusternumbers # Available optoins are: single, complete, average FD = hcluster.fclusterdata(water_coordinates, t = ProteinsList.inconsistency_coefficient, criterion='distance', metric='euclidean', depth=2, method= ProteinsList.clustering_method ) FDlist = list(FD) fcDic = {} for a,b in zip(water_ids,FDlist): if fcDic.has_key(b): fcDic[b].append(a) else: fcDic[b]=[a] conservedWaterDic = {} clusterPresenceDic = {} clusterPresenceOut = open(os.path.join( outdir, selectedPDBChain, '%s_clusterPresence.txt' % selectedPDBChain ),'w') clusterPresenceOut.write('Water Conservation Score'+'\t') proteins_numbers = {} l=0 for i in ProteinsList.proteins: proteins_numbers[(str(i))]=l l += 1 clusterPresenceOut.write('%s' % i +'\t') clusterPresenceOut.write('\n') logger.info( 'Extracting conserved waters from clusters ...' ) logger.debug( 'Start iterating over all clusters ...') for clusterNumber, waterMols in fcDic.items(): logger.debug('Cluster No.: %s - Included Water Molecules: %s' % ( clusterNumber, ', '.join( waterMols ) )) waterMolsNumber = len(waterMols) uniquePDBs = set([a[:6] for a in waterMols]) uniquePDBslen = len(uniquePDBs) # Update waterMols if there are two waterMolecules from same PDB is present in waterMols if uniquePDBslen < waterMolsNumber: logger.debug('Removed water molecules from the same protein in one cluster.') waterMols.sort() PDBs = [i[:6] for i in waterMols] PDBsCounter = collections.Counter(PDBs) for i in PDBsCounter.keys(): if PDBsCounter.get(i) > 1: for j in reversed(waterMols): if j[:6] == i: waterMols.remove(j) c = [] for i in range(len(ProteinsList.proteins)): for prot in proteins_numbers.keys(): if proteins_numbers[prot] == i: for d in waterMols: if prot == d[:6]: c.append(d) waterMols = c waterMolsNumber = len(waterMols) uniquePDBs = set([a[:6] for a in waterMols]) uniquePDBslen = len(uniquePDBs) probability = float(uniquePDBslen) / len(ProteinsList) logger.info( 'Degree of conservation is: %s' % probability ) if probability >= ProteinsList.probability: clusterPresenceOut.write(str(probability)+'\t') k=0 for waterMol in waterMols: sr_no = proteins_numbers[waterMol[:6]] if sr_no == k: clusterPresenceOut.write(str(waterMol[7:])+'\t') k += 1 elif k < sr_no: for j in range(sr_no-k): clusterPresenceOut.write('NoWater'+'\t') k += 1 clusterPresenceOut.write(str(waterMol[7:])+'\t') k += 1 for j in range(len(ProteinsList.proteins)-k): clusterPresenceOut.write('NoWater'+'\t') clusterPresenceOut.write('\n') if selectedPDBChain in uniquePDBs: cwm_count += 1 for waterMol in waterMols: if conservedWaterDic.has_key(waterMol[:6]): conservedWaterDic[waterMol[:6]].append('_'.join([waterMol[7:], str(probability)])) else: conservedWaterDic[waterMol[:6]] = ['_'.join([waterMol[7:], str(probability)])] clusterPresenceOut.close() logger.debug( 'conservedWaterDic is: ') for pdb_id, atoms in conservedWaterDic.items(): logger.debug('Oxygen atom numbers for %s: %s' % ( pdb_id, ', '.join( atoms ) )) if selectedPDBChain in conservedWaterDic.keys(): # save pdb file of only conserved waters for selected pdb atomNumbersProbDic = {} atomNumbers_Prob = conservedWaterDic[selectedPDBChain] logger.info( """Degree of conservation for each conserved water molecule is stored in cwm_%s_withConservedWaters.pdb with the format 'atomNumber'_'DegreeOfConservation'""" % ( selectedPDBChain ) ) for probability in atomNumbers_Prob: atom, prob = probability.split('_') atomNumbersProbDic[ atom ] = float( prob ) atomNumbers = atomNumbersProbDic.keys() selectedPDBChainConservedWatersOut = open(os.path.join(temp_dir, 'cwm_'+selectedPDBChain+'_ConservedWatersOnly.pdb'),'w+') selectedPDBChainIn = open(os.path.join(temp_dir, 'cwm_'+selectedPDBChain+'_Water.pdb')) for line in selectedPDBChainIn: if line.startswith('HETATM'): if str(int(line[22:30])) in atomNumbers: selectedPDBChainConservedWatersOut.write( line ) selectedPDBChainConservedWatersOut.write('END') selectedPDBChainConservedWatersOut.close() # add conserved waters to pdb file cmd.delete('cwm_*') cmd.load( os.path.join(temp_dir, 'cwm_%s.pdb' % selectedPDBChain) ) cmd.load( os.path.join(temp_dir, 'cwm_%s_ConservedWatersOnly.pdb' % selectedPDBChain) ) cmd.remove( 'resname hoh and '+'cwm_%s' % selectedPDBChain ) cmd.save( os.path.join(temp_dir, 'cwm_%s_withConservedWaters.pdb' % selectedPDBChain), 'cwm_*') cmd.delete('cwm_*') shutil.copy( os.path.join(temp_dir, 'cwm_%s_withConservedWaters.pdb' % selectedPDBChain), os.path.join(outdir,selectedPDBChain)) shutil.copy(os.path.join(temp_dir, 'cwm_%s.pdb' % selectedPDBChain),os.path.join(outdir,selectedPDBChain)) if os.path.exists(os.path.join(outdir, selectedPDBChain, 'cwm_%s_withConservedWaters.pdb' % selectedPDBChain)): logger.info( "%s structure has %s conserved water molecules." % (selectedPDBChain,cwm_count)) displayInPyMOL(os.path.join(outdir, selectedPDBChain), 'cwm_%s' % selectedPDBChain, atomNumbersProbDic) logger.info("""PDB file of query protein with conserved waters "cwm_%s_withConservedWaters.pdb" and logfile (pywater.log) is saved in %s""" % ( selectedPDBChain, os.path.abspath(outdir))) else: logger.info( "%s has no conserved waters" % selectedPDBChain ) else: logger.error( "%s has too many waters to cluster. Memory is not enough..." % selectedPDBChain ) else: logger.info( "%s has only one water molecule..." % selectedPDBChain ) else: logger.info( "%s and other structures from the same cluster do not have any water molecules." % selectedPDBChain ) else: logger.error( "%s has only one PDB structure. We need atleast 2 structures to superimpose." % selectedPDBChain )
def color_by_mutation(obj1, obj2, waters=0, labels=0): ''' DESCRIPTION Creates an alignment of two proteins and superimposes them. Aligned residues that are different in the two (i.e. mutations) are highlighted and colored according to their difference in the BLOSUM90 matrix. Is meant to be used for similar proteins, e.g. close homologs or point mutants, to visualize their differences. USAGE color_by_mutation selection1, selection2 [,waters [,labels ]] ARGUMENTS obj1: object or selection obj2: object or selection waters: bool (0 or 1). If 1, waters are included in the view, colored differently for the both input structures. default = 0 labels: bool (0 or 1). If 1, the possibly mutated sidechains are labeled by their chain, name and id default = 0 EXAMPLE color_by_mutation protein1, protein2 SEE ALSO super ''' from pymol import stored, CmdException if cmd.count_atoms(obj1) == 0: print '%s is empty'%obj1 return if cmd.count_atoms(obj2) == 0: print '%s is empty'%obj2 return waters = int(waters) labels = int(labels) # align the two proteins aln = '__aln' # first, an alignment with 0 cycles (no atoms are rejected, which maximized the number of aligned residues) # for some mutations in the same protein this works fine). This is essentially done to get a # sequence alignment cmd.super(obj1, obj2, object=aln, cycles=0) # superimpose the the object using the default parameters to get a slightly better superimposition, # i.e. get the best structural alignment cmd.super(obj1, obj2) stored.resn1, stored.resn2 = [], [] stored.resi1, stored.resi2 = [], [] stored.chain1, stored.chain2 = [], [] # store residue ids, residue names and chains of aligned residues cmd.iterate(obj1 + ' and name CA and ' + aln, 'stored.resn1.append(resn)') cmd.iterate(obj2 + ' and name CA and ' + aln, 'stored.resn2.append(resn)') cmd.iterate(obj1 + ' and name CA and ' + aln, 'stored.resi1.append(resi)') cmd.iterate(obj2 + ' and name CA and ' + aln, 'stored.resi2.append(resi)') cmd.iterate(obj1 + ' and name CA and ' + aln, 'stored.chain1.append(chain)') cmd.iterate(obj2 + ' and name CA and ' + aln, 'stored.chain2.append(chain)') mutant_selection = '' non_mutant_selection = 'none or ' colors = [] # loop over the aligned residues for n1, n2, i1, i2, c1, c2 in zip(stored.resn1, stored.resn2, stored.resi1, stored.resi2, stored.chain1, stored.chain2): # take care of 'empty' chain names if c1 == '': c1 = '""' if c2 == '': c2 = '""' if n1 == n2: non_mutant_selection += '((%s and resi %s and chain %s) or (%s and resi %s and chain %s)) or '%(obj1, i1, c1, obj2, i2, c2 ) else: mutant_selection += '((%s and resi %s and chain %s) or (%s and resi %s and chain %s)) or '%(obj1, i1, c1, obj2, i2, c2 ) # get the similarity (according to the blosum matrix) of the two residues and c = getBlosum90ColorName(n1, n2) colors.append((c, '%s and resi %s and chain %s and elem C'%(obj2, i2, c2))) if mutant_selection == '': print ' Error: No mutations found' raise CmdException # create selections cmd.select('mutations', mutant_selection[:-4]) cmd.select('non_mutations', non_mutant_selection[:-4]) cmd.select('not_aligned', '(%s or %s) and not mutations and not non_mutations'%(obj1, obj2)) # create the view and coloring cmd.hide('everything', '%s or %s'%(obj1, obj2)) cmd.show('cartoon', '%s or %s'%(obj1, obj2)) cmd.show('lines', '(%s or %s) and ((non_mutations or not_aligned) and not name c+o+n)'%(obj1, obj2)) cmd.show('sticks', '(%s or %s) and mutations and not name c+o+n'%(obj1, obj2)) cmd.color('gray', 'elem C and not_aligned') cmd.color('white', 'elem C and non_mutations') cmd.color('blue', 'elem C and mutations and %s'%obj1) for (col, sel) in colors: cmd.color(col, sel) cmd.hide('everything', '(hydro) and (%s or %s)'%(obj1, obj2)) cmd.center('%s or %s'%(obj1, obj2)) if labels: cmd.label('mutations and name CA','"(%s-%s-%s)"%(chain, resi, resn)') if waters: cmd.set('sphere_scale', '0.1') cmd.show('spheres', 'resn HOH and (%s or %s)'%(obj1, obj2)) cmd.color('red', 'resn HOH and %s'%obj1) cmd.color('salmon', 'resn HOH and %s'%obj2) print ''' Mutations are highlighted in blue and red. All mutated sidechains of %s are colored blue, the corresponding ones from %s are colored on a spectrum from blue to red according to how similar the two amino acids are (as measured by the BLOSUM90 substitution matrix). Aligned regions without mutations are colored white. Regions not used for the alignment are gray. NOTE: There could be mutations in the gray regions that were not detected.'''%(obj1, obj2) cmd.delete(aln) cmd.deselect()
## Fetch the pdb_ids in pdb_id_list print 'Fetch pdb_id files.' sys.stdout.flush() cmd.reinitialize() for pdb_id in pdb_id_list: cmd.fetch(pdb_id, async=0) cmd.disable(pdb_id) ## Perform a pairwise RMSD on every single unique permutation of pdb_ids, then store the results in a dictionary print 'Compute pairwise RMSD values and store in dictionary.' sys.stdout.flush() for i in range(len(pdb_id_list) - 1): for j in range((i + 1), len(pdb_id_list)): rmsd_key = (str(pdb_id_list[i]), str(pdb_id_list[j])) rmsd = cmd.super(pdb_id_list[i], pdb_id_list[j]) assert (len(rmsd) > 0) rmsd_dict[rmsd_key] = rmsd[0] ## Find the largest RMSD in your dictionary, and obtain/report the pdb_ids that resulted in the rmsd extrema print 'Find max and min RMSD pairs' sys.stdout.flush() max_rmsd = max(rmsd_dict.values()) assert (max_rmsd > 0) max_rmsd_key = [k for k, v in rmsd_dict.items() if v == max_rmsd] assert (len(max_rmsd_key) == 1) min_rmsd = min(rmsd_dict.values()) assert (min_rmsd > 0) min_rmsd_key = [k for k, v in rmsd_dict.items() if v == min_rmsd] assert (len(min_rmsd_key) == 1)
def alignlattice( target, mobile, a, b, c, color1="blue", color2="red", name1="supercell1", name2="supercell2", prefix1="m", prefix2="n", withmates=1, cutoff=None, ): """ DESCRIPTION Align two lattices. This facilitates the comparison of lattice contacts. USAGE alignlattice target, mobile, a, b, c, [color1/2, name1/2, prefix1/2, withmates, cutoff] ARGUMENTS target = string: name of object to generate the first lattice. This lattice is generated aligned to the original coordinates. mobile = string: name of the second object. The generated lattice will align to the original object. a, b, c = integer: repeat cell in x,y,z direction a,b,c times {default: 1,1,1} color1/2 = string: color of unit cell cgo objects {default: blue/red} name1/2 = string: name of the cgo object to create {default: supercell1/2} prefix1/2 = string: prefix for the symmetry mates {default: m/n} withmates = bool: also create symmetry mates in displayed cells {default: 1} cutoff = int: restrict symmetry mates to within cutoff angstroms of the origin. Use 0 to generate all symmetry mates. {default: 0} SEE ALSO show cell cmd """ # Check that selections contain a single object each mobileobj = "(byobj (%s))" % mobile targetobj = "(byobj (%s))" % target if len(cmd.get_object_list(mobileobj)) != 1: print("Expected single object for %s" % mobile) return if len(cmd.get_object_list(targetobj)) != 1: print("Expected single object for %s" % target) return mobilecopy = "mobile2823" # todo use unique name # Copy the mobile unit and get its superposition with the target cmd.create(mobilecopy, mobile) initial_mat = cmd.get_object_matrix(mobilecopy) cmd.super(mobilecopy, target) final_mat = cmd.get_object_matrix(mobilecopy) cmd.delete(mobilecopy) # TODO handle non-identity initial matrix orig_objects = set(cmd.get_object_list()) # Generate primary grid supercell( a, b, c, targetobj, color=color1, name=name1, withmates=withmates, prefix=prefix1, center=1, transformation=None, cutoff=cutoff, ) # Generate rotated grid supercell( a, b, c, mobileobj, color=color2, name=name2, withmates=withmates, prefix=prefix2, center=1, transformation=final_mat, cutoff=cutoff, ) colored_objects1 = set(cmd.get_object_list("(%s*)" % prefix1)) - orig_objects colored_objects2 = set(cmd.get_object_list("(%s*)" % prefix2)) - orig_objects for obj in colored_objects1: cmd.color(color1, obj) for obj in colored_objects2: cmd.color(color2, obj)
def draw_axis(chA, chB, scale_factor=20, w=0.6, r1=1, g1=1, b1=1, r2=1, g2=0, b2=0): T = transf_matrix(chA, chB) angle = angle_axis(chA, chB) angle_degrees = (angle * 180) / math.pi axis1 = [ direction_cosines(chA, chB)[0], direction_cosines(chA, chB)[1], direction_cosines(chA, chB)[2] ] p = nearest_point_to_axis(chA, chB) x1, y1, z1 = p[0] + (3 * scale_factor * axis1[0]), p[1] + ( 3 * scale_factor * axis1[1]), p[2] + (3 * scale_factor * axis1[2]) x2, y2, z2 = p[0] - (3 * scale_factor * axis1[0]), p[1] - ( 3 * scale_factor * axis1[1]), p[2] - (3 * scale_factor * axis1[2]) obj = [ cgo.CYLINDER, x1, y1, z1, x2, y2, z2, w, r1, g1, b1, r2, g2, b2, 0.0 ] cmd.load_cgo(obj, angle_degrees) cmA = center_of_Mass(chA) cmB = cmW cmAver = (cmB + cmA) / 2 vector = numpy.array([(cmB[0] - cmA[0]), (cmB[1] - cmA[1]), (cmB[2] - cmA[2])]) moduli_vector = numpy.linalg.norm(vector) vector_director = numpy.array([(cmB[0] - cmA[0]) / moduli_vector, (cmB[1] - cmA[1]) / moduli_vector, (cmB[2] - cmA[2]) / moduli_vector]) pC_A = proyeccion_centroide(chA, chA, chB) pC_B = proyeccion_centroide_working(chA, chB) trans_vector = numpy.array([(pC_B[0] - pC_A[0]), (pC_B[1] - pC_A[1]), (pC_B[2] - pC_A[2])]) modu_tr = numpy.linalg.norm(trans_vector) rota_centroid_rad = numpy.dot(vector_director, axis1) rota_centroid = (rota_centroid_rad * 180) / math.pi rota_centroid_absol_0 = numpy.absolute(rota_centroid) rota_centroid_absol = round(rota_centroid_absol_0, 2) if rota_centroid_absol == 0.00: p1 = '_1' p2 = '_2' p3 = '_3' cmd.pseudoatom(pos=[cmA[0], cmA[1], cmA[2]], object=p1) cmd.pseudoatom(pos=[pC_A[0], pC_A[1], pC_A[2]], object=p2) cmd.pseudoatom(pos=[cmB[0], cmB[1], cmB[2]], object=p3) cmd.angle(None, p1, p2, p3) print_information(T, axis1, angle_degrees, moduli_vector, obj, x1, y1, z1, x2, y2, z2, w, r1, g1, b1, r2, g2, b2) if rota_centroid_absol != 0: p1 = '_1' p2 = '_2' p3 = '_3' p4 = '_4' cmd.pseudoatom(pos=[cmA[0], cmA[1], cmA[2]], object=p1) cmd.pseudoatom(pos=[pC_A[0], pC_A[1], pC_A[2]], object=p2) cmd.pseudoatom(pos=[pC_B[0], pC_B[1], pC_B[2]], object=p3) cmd.pseudoatom(pos=[cmB[0], cmB[1], cmB[2]], object=p4) cmd.dihedral(None, p1, p2, p3, p4) cmd.distance(None, p2, p3) print_information(T, axis1, angle_degrees, moduli_vector, obj, x1, y1, z1, x2, y2, z2, w, r1, g1, b1, r2, g2, b2, modu_tr) cmd.create('working', chA) cmd.super('working', chB)
### Modify here url ='https://files.rcsb.org/download/5D98.pdb' cmd.load(url, 'orig') cmd.select('origPA', 'chain A') cmd.select('origPB1', 'chain B') cmd.select('origPB2', 'chain C') cmd.select('origPA_2', 'chain D') cmd.select('origPB1_2', 'chain E') cmd.select('origPB2_2', 'chain F') cmd.remove('origPA_2') cmd.remove('origPB1_2') cmd.remove('origPB2_2') cmd.load('S009PB2_{0}.pdb'.format(structure, 'S009')) cmd.super('S009', 'orig') cmd.hide('everything') cmd.show('cartoon') # cmd.show('surface') for c in ['origPA', 'origPB1', 'S009']: cmd.show('surface', '{0}'.format(c)) cmd.remove('origPB2') cmd.color('palegreen', 'origPA') cmd.color('palecyan', 'origPB1') cmd.color('white', 'S009') cmd.orient() ### To get PB2 only
cmd.reinitialize() cmd.set('bg_rgb', '[1,1,1]') # white cmd.set('antialias', '2') cmd.set('ray_opaque_background', 'off') cmd.set('depth_cue', 'off') ### Modify here url = 'https://files.rcsb.org/download/4WSB.pdb' cmd.load(url, 'bat') cmd.select('batPA', 'chain A') cmd.select('batPB1', 'chain B') cmd.select('batPB2', 'chain C') cmd.load('S009PB2_{0}.pdb'.format(structure, 'S009')) cmd.super('S009', 'bat') cmd.hide('everything') cmd.show('cartoon') # cmd.show('surface') for c in ['batPA', 'batPB1', 'S009']: cmd.show('surface', '{0}'.format(c)) cmd.remove('batPB2') cmd.color('palegreen', 'batPA') cmd.color('palecyan', 'batPB1') cmd.orient() ### To get PB2 only cmd.hide('everything', 'bat') cmd.hide('surface', 'S009')
def select_mutated(obj1,obj2,sele2=None): """ DESCRIPTION "select_mutated" finds and selects the amino acid differences in the second object when compared to the first object. It highlights the mutations in yellow sticks for easy identification. This new function can also handle length changes by using PyMOL's built-in align method. Some parts of this script were taken from a similar script written by Christoph Malisi, located on the PyMOL wiki at www.pymolwiki.org/index.php/Color_By_Mutations USAGE select_mutated wild_type_object_to_compare_to, mutant_object_to_select, selection_of_mutant_residues_to_limit_comparison_to EXAMPLES select_mutated 1xuu, 1ame, 1ame and chain g + 1ame and chain h select_mutated 1xuu, * """ if ( sele2 == None ): sele2 = obj2 objects = cmd.get_names("objects") for o in objects: if o.startswith(obj2): obj2 = o break one_letter ={'VAL':'V', 'ILE':'I', 'LEU':'L', 'GLU':'E', 'GLN':'Q','ASP':'D', 'ASN':'N', 'HIS':'H', 'TRP':'W', 'PHE':'F', 'TYR':'Y', \ 'ARG':'R', 'LYS':'K', 'SER':'S', 'THR':'T', 'MET':'M', 'ALA':'A','GLY':'G', 'PRO':'P', 'CYS':'C', 'MSE':'M', 'ASX':'N' } print "select_mutated called with object 1: " + obj1 + " and object 2: " + obj2 # use PyMOL to get a sequence alignment of the two objects (don't do any refinement to get a better fit - just align the sequences) cmd.align( obj2, obj1, object="alignment", cycles=0 ) # after doing the sequence alignment, use super to do a sequence-independent, structure-based alignment. supposedly much better than align. cmd.super( obj2, obj1 ) # alignment is an "object" which somehow contains both objects that were used for the alignment. we'll iterate over this alignment object # and save the chain, resi, and resn for each aligned position. making the big assumption here that the order of elements in the alignment # object is the same for both actual aligned objects, which seems to be the case. stored.obj1_resi = [] stored.obj2_resi = [] stored.obj1_resn = [] stored.obj2_resn = [] stored.obj1_chain = [] stored.obj2_chain = [] cmd.iterate( obj1 + " and n. CA and alignment", "stored.obj1_resi.append( resi )" ) cmd.iterate( obj2 + " and n. CA and alignment", "stored.obj2_resi.append( resi )" ) cmd.iterate( obj1 + " and n. CA and alignment", "stored.obj1_resn.append( resn )" ) cmd.iterate( obj2 + " and n. CA and alignment", "stored.obj2_resn.append( resn )" ) cmd.iterate( obj1 + " and n. CA and alignment", "stored.obj1_chain.append( chain )" ) cmd.iterate( obj2 + " and n. CA and alignment", "stored.obj2_chain.append( chain )" ) sele_mutations_list = [] sele_insert_list = [] wt_list = [] mut_list = [] mutations = [] # loop over the aligned residues for resn1, resn2, resi1, resi2, ch1, ch2 in zip( stored.obj1_resn, stored.obj2_resn, stored.obj1_resi, stored.obj2_resi, stored.obj1_chain, stored.obj2_chain ): # take care of 'empty' chain names if ch1 == '': ch1 = '""' if ch2 == '': ch2 = '""' if resn1 != resn2: #print "%s/%s-%s => %s/%s-%s" % ( ch1, resn1, resi1, ch2, resn2, resi2 ) sele_exp = '/' + '/'.join([ obj2, '', ch2, resi2 ]) sele_mutations_list.append( sele_exp ) wt_list.append( one_letter[resn1] ) mut_list.append( one_letter[resn2] ) mutations.append( "%s:%s%s%s" % ( ch2, one_letter[resn1], resi2, one_letter[resn2]) ) if not mutations: print "No mutations found." return selename = "mutated-" + obj2 #print "+".join(sele_mutations_list) cmd.select(selename, " + ".join(sele_mutations_list)) print "Mutations found: '%s'" % (mutations) cmd.show("sticks", selename) hideexp = "(mutated-" + obj2 + " and hydro)" cmd.hide(hideexp) cmd.color( "yellow", selename + " and not (name N+CA+C+O)" ) util.cnc(selename) cmd.disable(selename) for i in range(0,len(sele_mutations_list)): labelexp = '''"''' + wt_list[i] + '''%s''' + mut_list[i] + '''"''' + ''' % (resi)''' cmd.label( sele_mutations_list[i] + " and n. ca", labelexp ) #labelexp = '''(name ca+C1*+C1' and (byres(mutated-''' + obj2 + ''')))''' #cmd.label(labelexp,'''"%s-%s"%(resn,resi)''') # identify insertions also, by using the mutated selection and alignment object # this will be the intersection of everything that's in object2 that's not in the alignment object (will included mutated positions and inserts) and # not anything that's in the mutated selection selename = "inserts-" + obj2 cmd.select( selename, "(" + obj2 + " and not hydro and !(" + obj2 + " in alignment)) and !(mutated-" + obj2 + ")" ) cmd.color( "orange", selename ) util.cnc(selename) cmd.disable(selename) # clean up after ourselves cmd.delete("alignment")
def align_all(target=None, mobile_selection='name ca', target_selection='name ca', cutoff=2, cycles=5, cgo_object=0, method='align'): """ Aligns all models in a list to one target usage: align_all [target][target_selection=name ca][mobile_selection=name ca][cutoff=2][cycles=5][cgo_object=0][method='align'] where method can be align, super or cealign where target specifies the model id you want to align all others against, and target_selection, mobile_selection, cutoff and cycles are options passed to the align or super command. Options for method='align' or method='super': By default the selection is all C-alpha atoms and the cutoff is 2 and the number of cycles is 5. Setting cgo_object to 1, will cause the generation of an alignment object for each object. They will be named like <object>_on_<target>, where <object> and <target> will be replaced by the real object and target names. Example: align_all target=name1, mobile_selection=c. b & n. n+ca+c+o,target_selection=c. a & n. n+ca+c+o """ cutoff = int(cutoff) cycles = int(cycles) cgo_object = int(cgo_object) object_list = cmd.get_names() object_list.remove(target) rmsd = {} rmsd_list = [] for i in range(len(object_list)): if cgo_object: objectname = 'align_%s_on_%s' % (object_list[i], target) if method == 'align': rms = cmd.align('%s & %s' % (object_list[i], mobile_selection), '%s & %s' % (target, target_selection), cutoff=cutoff, cycles=cycles, object=objectname) elif method == 'super': rms = cmd.super('%s & %s' % (object_list[i], mobile_selection), '%s & %s' % (target, target_selection), cutoff=cutoff, cycles=cycles, object=objectname) elif method == 'cealign': rmsdict = cmd.cealign( '%s & %s' % (target, target_selection), '%s & %s' % (object_list[i], mobile_selection)) rms = [rmsdict['RMSD'], rmsdict['alignment_length'], 1, 0, 0] else: print( "only 'align', 'super' and 'cealign' are accepted as methods" ) sys.exit(-1) else: if method == 'align': rms = cmd.align('%s & %s' % (object_list[i], mobile_selection), '%s & %s' % (target, target_selection), cutoff=cutoff, cycles=cycles) elif method == 'super': rms = cmd.super('%s & %s' % (object_list[i], mobile_selection), '%s & %s' % (target, target_selection), cutoff=cutoff, cycles=cycles) elif method == 'cealign': rmsdict = cmd.cealign( '%s & %s' % (target, target_selection), '%s & %s' % (object_list[i], mobile_selection)) rms = [rmsdict['RMSD'], rmsdict['alignment_length'], 1, 0, 0] else: print( "only 'align', 'super' and 'cealign' are accepted as methods" ) sys.exit(-1) rmsd[object_list[i]] = (rms[0], rms[1]) rmsd_list.append((object_list[i], rms[0], rms[1])) rmsd_list.sort(lambda x, y: cmp(x[1], y[1])) # loop over dictionary and print out matrix of final rms values print("Aligning against:", target) for object_name in object_list: print("%s: %6.3f using %d atoms" % (object_name, rmsd[object_name][0], rmsd[object_name][1])) print("\nSorted from best match to worst:") for r in rmsd_list: print("%s: %6.3f using %d atoms" % r)
def run_salign_align3d(self, structures_to_align, output_file_name): """ alignment.malign3d - align structures """ # if len(structures_to_align)>2: # self.build_salign_dendrogram_menu=True # else: # salign only output dendrogram_file when there are 3 sequences or more # self.build_salign_dendrogram_menu=False shortcut_to_temp_files = os.path.join( self.pymod.current_project_dirpath, self.pymod.alignments_dirpath, output_file_name) struct_tup = list(range(0, len(structures_to_align))) for ii in range(0, len(structures_to_align)): struct_entry = structures_to_align[ii].get_pymol_selector() header = structures_to_align[ii].get_unique_index_header() chain_id = structures_to_align[ii].get_chain_id() struct_tup[ii] = (struct_entry, header, chain_id) # Change the working directory, so that the ouptut files will be created in the structures # directory. os.chdir(self.pymod.structures_dirpath) modeller.log.minimal() env = modeller.environ() aln = modeller.alignment(env) for (pdb_file_name, code, chain) in struct_tup: mdl = modeller.model(env, file=pdb_file_name, model_segment=("FIRST:" + chain, "LAST:" + chain)) aln.append_model(mdl, atom_files=pdb_file_name, align_codes=code) for (weights, write_fit, whole) in (((1., 0., 0., 0., 1., 0.), False, True), ((1., 0.5, 1., 1., 1., 0.), False, True), ((1., 1., 1., 1., 1., 0.), True, False)): aln.salign(rms_cutoff=3.5, normalize_pp_scores=False, rr_file="$(LIB)/as1.sim.mat", overhang=30, gap_penalties_1d=(-450, -50), gap_penalties_3d=(0, 3), gap_gap_score=0, gap_residue_score=0, dendrogram_file=shortcut_to_temp_files + ".tree", alignment_type="tree", feature_weights=weights, improve_alignment=True, fit=True, write_fit=write_fit, write_whole_pdb=whole, output="ALIGNMENT QUALITY") aln.write(file=shortcut_to_temp_files + ".ali", alignment_format="PIR") aln.salign(rms_cutoff=1.0, normalize_pp_scores=False, rr_file='$(LIB)/as1.sim.mat', overhang=30, gap_penalties_1d=(-450, -50), gap_penalties_3d=(0, 3), gap_gap_score=0, gap_residue_score=0, dendrogram_file=shortcut_to_temp_files + '.tree', alignment_type='progressive', feature_weights=[0] * 6, improve_alignment=False, fit=False, write_fit=True, write_whole_pdb=False, output='QUALITY') # Returns back to the project dir from the project/Structures directory. os.chdir(self.pymod.current_project_dirpath) # SALIGN does not superpose ligands. The generated "*_fit.pdb" # files are therefore ligandless. The following loop superposes # original structure to saligned structures, and replaces # "*_fit.pdb" files with the superposed liganded original structure. for pymod_element, (pdb_file_name_root, code, chain) in zip(structures_to_align, struct_tup): # Updates the name of the chains PDB files. fixed = os.path.join(self.pymod.structures_dirpath, pdb_file_name_root + "_fit.pdb") pymod_element.set_current_chain_file( os.path.join(self.pymod.current_project_dirpath, self.pymod.structures_dirpath, pdb_file_name_root + "_fit.pdb")) cmd.load(fixed, "salign_fixed_fit") if hasattr(cmd, "super"): # super is sequence-independent cmd.super(pdb_file_name_root, "salign_fixed_fit") else: # PyMOL 0.99 does not have cmd.super cmd.align(pdb_file_name_root, "salign_fixed_fit") cmd.set("retain_order", 1) cmd.save(fixed, pdb_file_name_root) # quick-and-dirty cmd.set("retain_order", 0) cmd.delete("salign_fixed_fit") # Convert the PIR format output file into a clustal format file. record = SeqIO.parse(shortcut_to_temp_files + '.ali', "pir") SeqIO.write(record, shortcut_to_temp_files + ".aln", "clustal")
cmd.reinitialize() cmd.set('bg_rgb', '[1,1,1]') # white cmd.set('antialias', '2') cmd.set('ray_opaque_background', 'off') cmd.set('depth_cue', 'off') ### Modify here url = 'https://files.rcsb.org/download/4UAD.pdb' cmd.load(url, 'orig') cmd.select('importin', 'chain A') cmd.select('origPB2', 'chain E') cmd.load('S009PB2_{0}.pdb'.format(structure, 'S009')) cmd.super('S009', 'origPB2') cmd.hide('everything') cmd.show('cartoon') # cmd.show('surface') for c in ['importin', 'S009']: cmd.show('surface', '{0}'.format(c)) # cmd.remove('origPB2') cmd.color('wheat', 'importin') cmd.color('white', 'S009') cmd.orient() ### To get PB2 only cmd.hide('everything') # cmd.hide('everything', 'origPB2')
# running by command # pymol -cqr this_script.py from pymol import cmd import os,glob,re # working dir of the script, also output dir for prepare_receptor4.py pdbqt_dir = 'receptors/wt-ensemble' # relative path from working dir above reference_structure = "../../structures/NA_2HU4.pdb" pdb_dir = '../../clustering/WT/tmp/' pdb_aligned_dir = '../../clustering/WT/ensemble/' cmd_PREPARE_RECEPTOR = 'pythonsh /home/Ubuntu/tk/Programs/mgltools_i86Linux2_1.5.4/\ MGLToolsPckgs/AutoDockTools/Utilities24/prepare_receptor4.py' os.chdir(pdbqt_dir) cmd.load(reference_structure, 'ref') for f in glob.glob(pdb_dir+'/cluster-????.pdb'): cmd.load(f, 'obj1') new_pdb = re.sub("^.*\/",pdb_aligned_dir,f) # print new_pdb cmd.super('obj1','ref') cmd.save(new_pdb,'obj1') cmd.delete('obj1') os.system("%s -r %s" % (cmd_PREPARE_RECEPTOR, new_pdb) )
cmd.delete(str.lower(pair[0])) for mobile in [ '{}.{}'.format(model, chain) for (model, chain) in args ][1:]: cmd.super(mobile, "{}.{}".format(args[0][0], args[0][1]),reset=1,transform=1,quiet=0) cmd.reset() cmd.save('alignment.cif') def create_subchain_object(pdbid, subchain): tempname = 'selection_{}.{}'.format(pdbid, subchain) cmd.select(tempname ,'m. {} and c. {}'.format( pdbid, subchain)) cmd.create('{}.{}'.format(pdbid,subchain), tempname) cmd.delete(tempname) if args.pmlxtnd: cmd.extend('create_subchain_object', create_subchain_object) cmd.extend('chain_align_save', chain_align_save); print("Added two commands to pymol.cmd") else: mcpairs=args.model_chain_tuples for pair in mcpairs: cmd.fetch(str.lower(pair[0])) create_subchain_object(pair[0], pair[1]) cmd.delete(str.lower(pair[0])) for mobile in [ '{}.{}'.format(model, chain) for (model, chain) in mcpairs ][1:]: cmd.super(mobile, "{}.{}".format(mcpairs[0][0], mcpairs[0][1]),reset=1,transform=1,quiet=0) cmd.reset() cmd.save('alignment.cif')
from pymol import cmd objs = cmd.get_object_list() for i in range(1, len(objs)): cmd.super(objs[i], objs[0])
def align_allfiles(target='', files='*.pdb', res_sel='i. 1-', outfile='', name='', cutoff=2.0, cycles=5, method='align'): """ Aligns all models in a list of files to one target using either align or super method. Outputs: Rosetta energy terms,c-alpha, backbone, and all atom rmsd for each model sorted by c-alpha rmsd usage: align_allfiles target=target name,files=name.pdb,res_sel='i. 1-,outfile='file name' cutoff=2,cycles=5 method=align where target specifies the model id you want to align all others against, and cutoff and cycles are options passed to the align command. You can specify the files to load and align using a wildcard. You should specify same number of residues for the target and other models. if outfile is not given output is stddount By default all residues in the structure are aligned,cutoff is 2,number of cycles is 5, and align method is used. Example: pymol -cdq "run/$SCRIPTS/align_allfiles;align_allfiles <target name>,[none default arguments eg res_sel= i. 100-120,...]" """ cutoff = int(cutoff) cycles = int(cycles) cmd.load(target) # change 1 #print outfile, cutoff,cycles,res_sel #Define rmsd and residue selection for mobile and target all = 'all & %s' % res_sel ca = 'n. ca & %s' % res_sel bb = 'n. n+c+ca+o and %s' % res_sel #obtain files from folder and remove native #tar=tarfile.open('*.tgz','r:tgz') file_list = glob.glob(files) #file_list.remove(target) #file_list.sort() for file in file_list: if file == target: file_list.remove(file) else: file_list.sort() #print file_list # print len(file_list) extension = re.compile('(^.*[\/]|\.(pdb|ent|brk))') object_list = [] target = extension.sub('', target) # change 3 # Define rmsd storage variables rmsd_list = [] for i in range(len(file_list)): obj_name1 = extension.sub('', file_list[i]) object_list.append(extension.sub('', file_list[i])) # energy=popen('grep "total_energy"' file_list[i]) # print energy cmd.load(file_list[i], obj_name1) if method == 'align': rms_ca = cmd.align('%s & %s' % (object_list[i], ca), '%s & %s' % (target, ca), cutoff=cutoff, cycles=cycles) rms_bb = cmd.align('%s & %s' % (object_list[i], bb), '%s & %s' % (target, bb), cutoff=cutoff, cycles=cycles) rms_all = cmd.align('%s & %s' % (object_list[i], all), '%s & %s' % (target, all), cutoff=cutoff, cycles=cycles) #print '%s,%6.2f,%6.2f,%6.2f' %(object_list[i],rms_bb[0],rms_ca[0],rms_all[0]) elif method == 'super': rms_ca = cmd.super('%s & %s' % (object_list[i], ca), '%s & %s' % (target, ca), cutoff=cutoff, cycles=cycles) rms_bb = cmd.super('%s & %s' % (object_list[i], bb), '%s & %s' % (target, bb), cutoff=cutoff, cycles=cycles) rms_all = cmd.super('%s & %s' % (object_list[i], all), '%s & %s' % (target, all), cutoff=cutoff, cycles=cycles) #print '%s,%6.2f,%6.2f,%6.2f' %(object_list[i],rms_ca[0],rms_bb[0],rms_all[0]) elif method == 'cealign': rmsdict = cmd.cealign( '%s & %s' % (target, target_selection), '%s & %s' % (object_list[i], mobile_selection)) rms = [rmsdict['RMSD'], rmsdict['alignment_length'], 1, 0, 0] else: print "only 'align', 'super' and 'cealign' are accepted as methods" sys.exit(-1) #rms = cmd.align('%s & %s'%(object_list[i],mobile_selection),'%s & %s'%(target,target_selection),cutoff=cutoff,cycles=cycles,object=objectname) #else: #rms = cmd.align('%s & %s'%(object_list[i],mobile_selection),'%s & %s'%(target,target_selection),cutoff=cutoff,cycles=cycles) #rmsd[object_list[i]] = (rms_ca[0],rms_ca[1],rms_bb[0],rms_bb[1],rms_all[0],rms_all[1]) rmsd_list.append((object_list[i], rms_ca[0], rms_bb[0], rms_all[0])) cmd.delete(obj_name1) #print rmsd rmsd_list.sort( lambda x, y: cmp(x[2], y[2]) ) #compare ca rms you can assign other rms indexes to sort i.e index 2 for bb and index 3 for all atom # loop over dictionary and print out matrix of final rms values outfp = outfile table = False out = open(outfp + '_score_vs_rmsd.csv', 'w') out.write('model,score,ca,bb,all,fa_atr,fa_rep,fa_sol,fa_elec\n') for r in rmsd_list: for decoy in file_list: model = open(decoy, 'r') for line in model: line_split = line.split() if line_split[0] == '#BEGIN_POSE_ENERGIES_TABLE': table = True continue if table and line_split[0] == 'pose': score = float(line_split[-1]) fa_atr = float(line_split[1]) fa_rep = float(line_split[2]) fa_elec = float(line_split[5]) fa_sol = float(line_split[3]) if r[0] == decoy.replace('.pdb', ''): out.write( '%s,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f\n' % (decoy, score, r[1], r[2], r[3], fa_atr, fa_rep, fa_sol, fa_elec)) print '%s,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f,%6.2f\n' % ( decoy, score, r[1], r[2], r[3], fa_atr, fa_rep, fa_sol, fa_elec) out.close() #generate ensemble figure protocol = outfp.split('_')[1] cmd.bg_color(color='white') cmd.hide("all") cmd.show("cartoon") cmd.set("antialias", 1) cmd.set("ray_trace_mode", 0) cmd.set("depth_cue", 0) cmd.set("ray_trace_fog", 0) cmd.set("stick_radius", 0.1) cmd.set("cartoon_side_chain_helper", 1) cmd.set("cartoon_flat_sheets", 1) # cmd.set("cartoon_transparency",0.8) cmd.set("ray_shadow", 0) if protocol == 'loopmodel': cmd.rotate("x", 70) cmd.zoom("resi 143-163") cmd.rotate("z", 20) cmd.rotate("y", 45) cmd.move("y", 8) cmd.move("x", 3) cmd.save(outfp + 'ensemble.pse') cmd.delete("all") elif protocol == 'fixed' or protocol == 'backrub': util.cbab("chain h") util.cbac("chain l") util.cbam("chain g") cmd.turn("y", -140) cmd.turn("x", -10) cmd.turn("z", 10) cmd.move("y", -5) cmd.move("z", 30) cmd.png(outfp + '_ensemble.png', 2400, 2400, dpi=300, ray=1) cmd.save(outfp + 'ensemble.pse') cmd.delete("all") #generate score vs rmsd plot scorevsrmsd(outfp + '_score_vs_rmsd.csv', name=outfp)
def color_by_mutation(obj1, obj2, waters=0, labels=0): ''' DESCRIPTION Creates an alignment of two proteins and superimposes them. Aligned residues that are different in the two (i.e. mutations) are highlighted and colored according to their difference in the BLOSUM90 matrix. Is meant to be used for similar proteins, e.g. close homologs or point mutants, to visualize their differences. USAGE color_by_mutation selection1, selection2 [,waters [,labels ]] ARGUMENTS obj1: object or selection obj2: object or selection waters: bool (0 or 1). If 1, waters are included in the view, colored differently for the both input structures. default = 0 labels: bool (0 or 1). If 1, the possibly mutated sidechains are labeled by their chain, name and id default = 0 EXAMPLE color_by_mutation protein1, protein2 SEE ALSO super ''' from pymol import stored, CmdException if cmd.count_atoms(obj1) == 0: print '%s is empty' % obj1 return if cmd.count_atoms(obj2) == 0: print '%s is empty' % obj2 return waters = int(waters) labels = int(labels) # align the two proteins aln = '__aln' # first, an alignment with 0 cycles (no atoms are rejected, which maximized the number of aligned residues) # for some mutations in the same protein this works fine). This is essentially done to get a # sequence alignment cmd.super(obj1, obj2, object=aln, cycles=0) # superimpose the the object using the default parameters to get a slightly better superimposition, # i.e. get the best structural alignment cmd.super(obj1, obj2) stored.resn1, stored.resn2 = [], [] stored.resi1, stored.resi2 = [], [] stored.chain1, stored.chain2 = [], [] # store residue ids, residue names and chains of aligned residues cmd.iterate(obj1 + ' and name CA and ' + aln, 'stored.resn1.append(resn)') cmd.iterate(obj2 + ' and name CA and ' + aln, 'stored.resn2.append(resn)') cmd.iterate(obj1 + ' and name CA and ' + aln, 'stored.resi1.append(resi)') cmd.iterate(obj2 + ' and name CA and ' + aln, 'stored.resi2.append(resi)') cmd.iterate(obj1 + ' and name CA and ' + aln, 'stored.chain1.append(chain)') cmd.iterate(obj2 + ' and name CA and ' + aln, 'stored.chain2.append(chain)') mutant_selection = '' non_mutant_selection = 'none or ' colors = [] # loop over the aligned residues for n1, n2, i1, i2, c1, c2 in zip(stored.resn1, stored.resn2, stored.resi1, stored.resi2, stored.chain1, stored.chain2): # take care of 'empty' chain names if c1 == '': c1 = '""' if c2 == '': c2 = '""' if n1 == n2: non_mutant_selection += '((%s and resi %s and chain %s) or (%s and resi %s and chain %s)) or ' % ( obj1, i1, c1, obj2, i2, c2) else: mutant_selection += '((%s and resi %s and chain %s) or (%s and resi %s and chain %s)) or ' % ( obj1, i1, c1, obj2, i2, c2) # get the similarity (according to the blosum matrix) of the two residues and c = getBlosum90ColorName(n1, n2) colors.append( (c, '%s and resi %s and chain %s and elem C' % (obj2, i2, c2))) if mutant_selection == '': print ' Error: No mutations found' raise CmdException # create selections cmd.select('mutations', mutant_selection[:-4]) cmd.select('non_mutations', non_mutant_selection[:-4]) cmd.select( 'not_aligned', '(%s or %s) and not mutations and not non_mutations' % (obj1, obj2)) # create the view and coloring cmd.hide('everything', '%s or %s' % (obj1, obj2)) cmd.show('cartoon', '%s or %s' % (obj1, obj2)) cmd.show( 'lines', '(%s or %s) and ((non_mutations or not_aligned) and not name c+o+n)' % (obj1, obj2)) cmd.show('sticks', '(%s or %s) and mutations and not name c+o+n' % (obj1, obj2)) cmd.color('gray', 'elem C and not_aligned') cmd.color('white', 'elem C and non_mutations') cmd.color('blue', 'elem C and mutations and %s' % obj1) for (col, sel) in colors: cmd.color(col, sel) cmd.hide('everything', '(hydro) and (%s or %s)' % (obj1, obj2)) cmd.center('%s or %s' % (obj1, obj2)) if labels: cmd.label('mutations and name CA', '"(%s-%s-%s)"%(chain, resi, resn)') if waters: cmd.set('sphere_scale', '0.1') cmd.show('spheres', 'resn HOH and (%s or %s)' % (obj1, obj2)) cmd.color('red', 'resn HOH and %s' % obj1) cmd.color('salmon', 'resn HOH and %s' % obj2) print ''' Mutations are highlighted in blue and red. All mutated sidechains of %s are colored blue, the corresponding ones from %s are colored on a spectrum from blue to red according to how similar the two amino acids are (as measured by the BLOSUM90 substitution matrix). Aligned regions without mutations are colored white. Regions not used for the alignment are gray. NOTE: There could be mutations in the gray regions that were not detected.''' % ( obj1, obj2) cmd.delete(aln) cmd.deselect()
from pymol import cmd objs = cmd.get_object_list() for i in range(1, len(objs)): cmd.super(objs[i] + ' and (ss s)', objs[0] + ' and (ss s)')