def test_split_chains(): cmd.reinitialize() cmd.fab('ACD', 'm1', chain='A') cmd.fab('EFG', 'm2', chain='B') cmd.create('m3', 'm1 m2') psico.editing.split_chains('m3') assert cmd.get_chains('m3_A') == ['A'] assert cmd.get_chains('m3_B') == ['B'] psico.editing.split_chains('m3', 'foo_') assert cmd.get_chains('foo_0001') == ['A']
def test_assembly(self): cmd.load(self.datafile('4m4b-minimal-w-assembly.cif')) self.assertEqual(cmd.count_states(), 1) self.assertEqual(cmd.get_chains(), ['A', 'B']) cmd.delete('*') cmd.set('assembly', '1') cmd.load(self.datafile('4m4b-minimal-w-assembly.cif')) self.assertEqual(cmd.count_states(), 2) self.assertEqual(cmd.get_chains(), ['B'])
def split_chains(selection='(all)', prefix=None): ''' DESCRIPTION Create a single object for each chain in selection SEE ALSO split_states, http://pymolwiki.org/index.php/Split_object ''' count = 0 models = cmd.get_object_list('(' + selection + ')') for model in models: for chain in cmd.get_chains('(%s) and model %s' % (selection, model)): if chain == '': chain = "''" count += 1 if not prefix: name = '%s_%s' % (model, chain) else: name = '%s%04d' % (prefix, count) cmd.create( name, '(%s) and model %s and chain %s' % (selection, model, chain)) cmd.disable(model)
def uniprot_auto(pdb_id, selection='', withss=0, quiet=1): ''' DESCRIPTION Like "uniprot_features" but with automatic fetching of UniProtKB accession and sequence mapping for given pdb_id from http://www.bioinf.org.uk/pdbsws/ ARGUMENTS pdb_id = string: PDB accession ID selection = string: atom selection {default: <pdb_id>, will be fetched if no such object is loaded} withss = 0/1: update secondary structure {default: 0} ''' from urllib import urlopen if len(pdb_id) != 4 or not pdb_id[0].isdigit(): print ' Error: invalid pdb_id:', pdb_id raise CmdException if not selection: selection = pdb_id if pdb_id not in cmd.get_names('all'): cmd.fetch(pdb_id, async=0) sele_chains = cmd.get_chains(selection) mappings = {} pdb_id = pdb_id.lower() url = 'http://www.bioinf.org.uk/cgi-bin/pdbsws/query.pl?plain=1&qtype=pdb&all=yes&id=' + pdb_id try: for line in urlopen(url): if not line.startswith(pdb_id): continue chain = line[5] resno = line[20:25].strip() acc = line[27:36].strip() number = line[40:50].strip() if not acc or not number: continue if chain not in mappings: mappings[chain] = acc, resid_mapper() if mappings[chain][0] != acc and chain in sele_chains: raise ValueError('multiple accessions per chain not supported') mappings[chain][1][int(number)] = resno except Exception as e: print ' Error:', e raise CmdException for chain, (acc, sm) in mappings.iteritems(): uniprot_features(acc, '(%s) and chain %s' % (selection, chain), withss, 'feature_' + chain + '_', sm, quiet)
def prepareprot_scaffold(preload, presave, receptor, postload, postsave, algchoice): canproceed = False cmd.load(preload + receptor + postload) all_chains = cmd.get_chains(receptor) print( f'Which chains would you like to keep for protein {receptor}. It has following chains: ', all_chains) selectedchain = input( "Type the chain names seperated by space, \nif you want all chains, type 'all': " ) chainstoremove = set(all_chains) - set(selectedchain.split(' ')) if set(selectedchain.split(' ')).issubset(set(all_chains)): for chain in chainstoremove: cmd.remove('chain ' + chain) print(f'Chain {chain} removed.') canproceed = True elif selectedchain == 'all': print('All chains selected.') canproceed = True else: print( 'Wrong chain selection. Type it like "A B 6" if you have three chains named A, B and 6' ) if canproceed: cmd.remove('resn HOH') #cmd.h_add(selection='acceptors or donors') cmd.save(presave + receptor + postsave) subprocess.call( shlex.split(f'./prepareprot.sh {receptor} {algchoice}'))
def testCifMissing(self): N = 7 cmd.fragment('gly', 'm1') cmd.alter('all', '(chain, segi, resv, alt) = ("?", ".", 5, "")') s = cmd.get_str('cif') self.assertTrue("'?'" in s or '"?"' in s) # chain self.assertTrue("'.'" in s or '"."' in s) # segi self.assertTrue(' ? ' in s) # e.g. pdbx_PDB_ins_code self.assertTrue(' . ' in s) # e.g. label_alt_id cmd.delete('*') cmd.set('cif_keepinmemory') cmd.load(s, 'm2', format='cifstr') self.assertEqual(['?'], cmd.get_chains()) self.assertEqual(cmd.count_atoms('segi .'), N) self.assertEqual(cmd.count_atoms('alt ""'), N) # no alt self.assertEqual(cmd.count_atoms('resi 5'), N) # no ins_code from pymol.querying import cif_get_array self.assertEqual(cif_get_array("m2", "_atom_site.type_symbol"), list('NCCOHHH')) self.assertEqual(cif_get_array("m2", "_atom_site.id", "i"), list(range(1, N + 1))) self.assertEqual(cif_get_array("m2", "_atom_site.auth_asym_id"), ['?'] * N) self.assertEqual(cif_get_array("m2", "_atom_site.label_asym_id"), ['.'] * N) self.assertEqual(cif_get_array("m2", "_atom_site.pdbx_pdb_ins_code"), [None] * N) self.assertEqual(cif_get_array("m2", "_atom_site.label_alt_id"), [None] * N)
def testFetchLocal(self): try: import urllib.parse as urlparse except ImportError: import urlparse with testing.mkdtemp() as fetch_path: names = [] cmd.set('fetch_path', fetch_path) cmd.set( 'fetch_host', urlparse.urlunsplit( ['file', '', self.datafile('pdb.mirror'), '', ''])) cmd.fetch('1avy', type='pdb') names += ['1avy'] self.assertItemsEqual(cmd.get_names(), names) cmd.fetch('1avyB', type='pdb') names += ['1avyB'] self.assertItemsEqual(cmd.get_names(), names) self.assertEqual(cmd.get_chains('1avyB'), ['B']) cmd.fetch('1aq5', type='pdb', multiplex=1) names += ['1aq5_%04d' % (i + 1) for i in range(20)] self.assertItemsEqual(cmd.get_names(), names)
def testFetchLocal(self): try: import urllib.parse as urlparse except ImportError: import urlparse # PyMOL 1.8.6 adds full URLs, remove them import pymol pdbpaths = pymol.importing.hostPaths['pdb'] pdbpaths[:] = [p for p in pdbpaths if '://' not in p] with testing.mkdtemp() as fetch_path: names = [] cmd.set('fetch_path', fetch_path) cmd.set('fetch_host', urlparse.urlunsplit(['file', '', self.datafile('pdb.mirror'), '', ''])) cmd.fetch('1avy', type='pdb') names += ['1avy'] self.assertItemsEqual(cmd.get_names(), names) cmd.fetch('1avyB', type='pdb') names += ['1avyB'] self.assertItemsEqual(cmd.get_names(), names) self.assertEqual(cmd.get_chains('1avyB'), ['B']) cmd.fetch('1aq5', type='pdb', multiplex=1) names += ['1aq5_%04d' % (i+1) for i in range(20)] self.assertItemsEqual(cmd.get_names(), names)
def f_get_interface_center(complex_txt): chains = cmd.get_chains(complex_txt) interface_data = interfaceResidues.interfaceResidues(complex_txt, cA=chains[0], cB=chains[1], cutoff=1, selName="Interface_res") cmd.select("A_interface", "Interface_res & chain A") cmd.select("B_interface", "Interface_res & chain B") # get residues stored.intA = [] cmd.iterate("A_interface & name CA", "stored.intA.append(resi)") stored.intB = [] cmd.iterate("B_interface & name CA", "stored.intB.append(resi)") # get center of coordinates of the interface (A and B) center_of_mass.com("A_interface", object="A_center_pseudoatom") center_of_mass.com("B_interface", object="B_center_pseudoatom") A_center = center_of_mass.get_com("A_interface") B_center = center_of_mass.get_com("B_interface") dist_iface_center1 = distancetoatom.distancetoatom(origin="A_center_pseudoatom", selection="A_interface and name CA", property_name="", cutoff=200) center_iface_CA1 = dist_iface_center1[2][0].split("/")[4].split("`")[1] # get the closest CA d_center_iface_CA1 = dist_iface_center1[2][4] # get the distance dist_iface_center2 = distancetoatom.distancetoatom(origin="B_center_pseudoatom", selection="B_interface and name CA", property_name="", cutoff=200) center_iface_CA2 = dist_iface_center2[2][0].split("/")[4].split("`")[1] # get the closest CA d_center_iface_CA2 = dist_iface_center2[2][4] # get the distance to_return = {"CA1" : center_iface_CA1, "d_CA1" : d_center_iface_CA1, "CA2" : center_iface_CA2, "d_CA2" : d_center_iface_CA2, "int_res1": stored.intA, "int_res2": stored.intB} return to_return
def zero_residues(sel1, offset=0, chains=0): """ """ offset = int(offset) # variable to store the offset stored.first = None # get the names of the proteins in the selection names = [ '(model %s and (%s))' % (p, sel1) for p in cmd.get_object_list('(' + sel1 + ')') ] if int(chains): names = [ '(%s and chain %s)' % (p, chain) for p in names for chain in cmd.get_chains(p) ] # for each name shown for p in names: # get this offset ok = cmd.iterate("first %s and polymer and n. CA" % p, "stored.first=resv") # don't waste time if we don't have to if not ok or stored.first == offset: continue # reassign the residue numbers cmd.alter("%s" % p, "resi=str(int(resi)-%s)" % str(int(stored.first) - offset)) # update pymol cmd.rebuild()
def getChains(): '''获取当前导入的PDB文件所含有的chains''' chains=[] for x in cmd.get_names(): for ch in cmd.get_chains(x): chains.append(ch) return chains
def testFetchLocal(self): try: import urllib.parse as urlparse except ImportError: import urlparse # PyMOL 1.8.6 adds full URLs, remove them import pymol pdbpaths = pymol.importing.hostPaths['pdb'] pdbpaths[:] = [p for p in pdbpaths if '://' not in p] with testing.mkdtemp() as fetch_path: names = [] cmd.set('fetch_path', fetch_path) cmd.set( 'fetch_host', urlparse.urlunsplit( ['file', '', self.datafile('pdb.mirror'), '', ''])) cmd.fetch('1avy', type='pdb') names += ['1avy'] self.assertItemsEqual(cmd.get_names(), names) cmd.fetch('1avyB', type='pdb') names += ['1avyB'] self.assertItemsEqual(cmd.get_names(), names) self.assertEqual(cmd.get_chains('1avyB'), ['B']) cmd.fetch('1aq5', type='pdb', multiplex=1) names += ['1aq5_%04d' % (i + 1) for i in range(20)] self.assertItemsEqual(cmd.get_names(), names)
def chain_contact(): def chain_contact_loop(c,skip,chainPullList): d = 0 l = c + 1 while len(chainPullList) > l and (26-d) >= 0: cmd.select('chain_contact','%s w. 5 of %s'%(chainPullList[d],chainPullList[c+1]),enable=0,quiet=1,merge=1) cmd.select('chain_contact','%s w. 5 of %s'%(chainPullList[c+1],chainPullList[d]),enable=0,quiet=1,merge=1) d += 1 l += 1 while d == (c+1) or d in skip: d += 1 glb.update() cmd.hide('everything') cmd.show('mesh', 'all') cmd.color('gray40', 'all') objects = cmd.get_names('all') chainPullList = [] for i in cmd.get_chains(quiet=1): chainPullList.append('Chain-'+i) if len(chainPullList) < 2: showinfo('Notice','There needs to be two or more chains to run this functions.') return False c = 0 skip = [] while c < (len(chainPullList)-1) and c < 26: skip.append(c+1) chain_contact_loop(c,skip,chainPullList) c += 1 glb.procolor('chain_contact','mesh','cpk',None) cmd.delete('chain_contact') return chainPullList
def getObgSeqLen(obj): obj_names = cmd.get_names('objects', 1, obj) Chains = cmd.get_chains('%s and (%s)' % (obj_names[0],"all")) chainSele = [obj_names[0]+" and chain "+chain for chain in Chains] # list of selection strings resList = [[at.resn for at in cmd.get_model(c).atom] for c in chainSele] resNum = len(list(itertools.chain(*resList))) print("Number of Atoms in the model = ",resNum) return resList,resNum
def testAdvanced(self): cmd.fab('A/123/ ADC B/234/ AFCD') v = cmd.get_chains() self.assertEqual(v, ['A', 'B']) cmd.iterate('last chain B', 'stored.v = (resv, resn)') self.assertEqual(stored.v, (237, 'ASP'))
def testLoad_pqr_various(self): import glob pqrdir = self.datafile('pqr') extent_expect = { 19: [[1999.32, 2998.77, -901.70], [2008.4, 3006.35, -898.02]], 36: [[1998.629, 2998.033, -902.509], [2008.765, 3006.347, -898.022]], } for filename in glob.glob(os.path.join(pqrdir, '*.pqr')): cmd.load(filename) n_atom = cmd.count_atoms() extent = cmd.get_extent() self.assertArrayEqual(extent, extent_expect[n_atom], delta=1e-2, msg=filename) if 'chain' in os.path.basename(filename): self.assertEqual(cmd.get_chains(), ['A']) else: self.assertEqual(cmd.get_chains(), ['']) cmd.delete('*')
def get_sequence(obj): seq = '' for chain in cmd.get_chains(obj): seq_ = cmd.get_fastastr(f'{obj} and chain {chain} and polymer.protein') seq_ = seq_.split()[1:] seq_ = ''.join(seq_) seq += seq_ return seq
def setchains(sele="all"): aa = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" i = 0 for o in enumerate(cmd.get_object_list()): for c in cmd.get_chains(): cmd.alter("chain %s and %s" % (c, o), "chain='%s'" % aa[i % len(aa)]) i += 1 return
def test_gen(max_num_mut=6, target_num_mut=None, flex_only=False, limit_seqspace=False): """Generate all reasonable designs from the current pdb Args: max_num_mut (int): The maximum number of mutations to allow target_num_mut (int): Only return mutable sets of this size flex_only (boolean): Allow mutations? Raises: CmdException: Error in a PyMol command """ # Generate all interfaces # Note - this command seems to work well for picking only # sidechain-sidechain contacts interface(None,None,3,1) try: inter_list = cmd.get_names("selections", 0, "all") except: raise CmdException inter_list = [ e for e in inter_list if e[0:3]=="int" ] # Find potential mutable residues mut_list = [] for inter in inter_list: chains = cmd.get_chains(inter) inter_1 = inter+' and chain '+chains[0] inter_2 = inter+' and chain '+chains[1] print(inter_1) mut_list.extend(gen_mut(inter_1, max_num_mut, target_num_mut)) print(inter_2) mut_list.extend(gen_mut(inter_2, max_num_mut, target_num_mut)) print('Generated '+str(len(mut_list))+' sets of mutations') # For each mutable residue set, generate a flexible shell and print design counter = 0 for mut in mut_list: #Select muts obj_name = cmd.get_names('objects',0,'all')[0] resi_list = [ str(res.res_seq)+res.i_code for res in mut ] chain = next(iter(mut)).chain_id selection = "br. chain "+chain+" and resi "+'+'.join(resi_list) cmd.select("mut", selection) #gen_shell gen_shell(1,0,"mut",5) #print output name = '_'.join([obj_name[:4], chain])+".cfs" print_design("mut", "flex", name, flex_only, limit_seqspace) counter = counter+1
def load(type, id): # we clear pymol, make filename and path quantum_target = [] target_features = [] cmd.delete('all') file_name = f'{id}.{type}' if type is 'cif' else id dataset_path = os.path.join('.', 'datasets', type) path = os.path.join(dataset_path, file_name) # we load the target print(f'load {id} {path}') if type is "cif": if not os.path.exists(path): cmd.fetch(id, path=dataset_path) elif os.path.exists(path): cmd.load(path) elif type is "xyz": cmd.load(path) elif type is "rxn": reactants, products = get_reactants_products(path) print(f"{str(reactants)} ---> {str(products)}") for product in products: product_path = os.path.join('.', 'datasets', 'mol', product) cmd.load(product_path) # make target clean_pymol() model = cmd.get_model('all', 1) target_positions = get_positions(model) quantum_target = get_quantum_target(path) if type is "xyz" else None target_features = get_features(model) if type is "rxn" else None target_masses = get_masses(model) if type is "rxn" else None target_numbers = get_numbers(model) if type is "rxn" else None if type is 'rxn': cmd.delete('all') for reactant in reactants: reactant_path = os.path.join('.', 'datasets', 'mol', reactant) cmd.load(reactant_path) clean_pymol() model = cmd.get_model('all', 1) # make the model inputs if type is 'cif': chains = cmd.get_chains('all') if random.random() < P_UNDOCK: undock(chains, type) if random.random() < P_UNFOLD: unfold(chains) if type is 'rxn': names = cmd.get_names('all') undock(names, type) positions = get_positions(model) features = get_features(model) masses = get_masses(model) numbers = get_numbers(model) return make_example(type, id, target_positions, positions, features, masses, numbers, quantum_target, target_features, target_masses, target_numbers)
def test_update_identifiers(): cmd.reinitialize() cmd.fab('ACD', 'm1') cmd.fab('ACD', 'm2') cmd.remove('m1 and not backbone') cmd.alter('m1', '(segi, chain) = ("Segi", "Chain")') psico.editing.update_identifiers('m2', 'm1', identifiers='segi chain') assert cmd.get_chains('m2') == ['Chain'] my_values = [] cmd.iterate('m2', 'my_values.append(segi)', space=locals()) assert set(my_values) == set(["Segi"])
def split_chains(obj=None, color=1): if obj is None: obj = cmd.get_object_list()[0] chains = cmd.get_chains(obj) for chain in chains: new_obj = obj+"_"+chain cmd.create(new_obj, f"{obj} and chain {chain}") cmd.color('atomic', new_obj) cmd.center(obj) cmd.delete(obj) if int(color) == 1: util.color_objs(f"elem c and {obj}_*")
def main(): args = parse_args() pep_pdbs = read_rmsd(args) for i in range(0, 7): outputfilehandler = open('phi_psi_' + str(i + 2) + '.csv', 'w') outputfilehandler.write("pdb,chain,pep_seq,index,phi,psi\n") outputfilehandler.close() for key in pep_pdbs: phi = [] psi = [] model_name = key + '_reordered' pdbfile = key + '_reordered.pdb' targetfile = args.dir + '/' + pdbfile cmd.load(targetfile) chains = cmd.get_chains(model_name) try: #psi.append(cmd.get_dihedral(chains[1]+"/1/N,",chains[1]+"/1/ca,",chains[1]+"/1/c,",chains[1]+"/2/n")) #phi_psi = cmd.phi_psi(model_name+ " and chain "+chains[1]) for i in range(2, 9): phi.append( cmd.get_dihedral(chains[1] + "/" + str(i - 1) + "/c,", chains[1] + "/" + str(i) + "/n,", chains[1] + "/" + str(i) + "/ca,", chains[1] + "/" + str(i) + "/c")) psi.append( cmd.get_dihedral(chains[1] + "/" + str(i) + "/N,", chains[1] + "/" + str(i) + "/ca,", chains[1] + "/" + str(i) + "/c,", chains[1] + "/" + str(i + 1) + "/n")) #phi.append(cmd.get_dihedral(chains[1]+"/8/c,",chains[1]+"/9/n,",chains[1]+"/9/ca,",chains[1]+"/9/c")) except: print("Found exception, ignoring") if len(phi) == 7 and len(psi) == 7: for i in range(0, len(phi)): outputfilehandler = open('phi_psi_' + str(i + 2) + '.csv', 'a') print(pep_pdbs[key] + ',' + chains[1] + ',' + str(i + 2) + ',' + str(phi[i]) + ',' + str(psi[i])) outputfilehandler.write(key + ',' + chains[1] + ',' + pep_pdbs[key] + ',' + str(i + 2) + ',' + str(phi[i]) + ',' + str(psi[i]) + '\n') outputfilehandler.close() pymol.cmd.do("delete all")
def intraDistanceMap(self, pymolObject1): labelPositions = [] stored.residueList = [] chains = cmd.get_chains(pymolObject1) #for homoOligomer mode chains1 = chains #iterate over chains if self.homoOligomerMode == True: chains1 = chains[0] for chain_1 in chains1: for chain_2 in chains: id = uuid.uuid1() try: distances, x, y, diagonal, accObj1, accObj2, limits = self.calculateDistanceMap(pymolObject1, chain_1, pymolObject1, chain_2) except: print "Could not calculate distance map." break #plot distance map xlim = [limits["minChain_1"], limits["maxChain_1"]] ylim = [limits["minChain_2"], limits["maxChain_2"]] z = numpy.transpose(distances) zm = ma.masked_where(numpy.isnan(z),z) fileName = "%s_%s-%s_distanceMatrix" %(pymolObject1, chain_1, chain_2) plotDictionary = self.makeGraphDataDictionary(id, fileName, "DistanceMap", "Number of Residue", "Number of Residue", x, y, zm, xlim, ylim) stored.plots.append(plotDictionary) if self.writeToFile: numpy.savetxt(fileName, distances) #Plot accessibility for chain 1 fileName = "%s_%s_accessibilty" %(pymolObject1, chain_1) x = numpy.linspace(1, accObj1.shape[0], num = accObj1.shape[0]) if self.writeToFile: numpy.savetxt("%s_%s_accessibility.txt" %(pymolObject1, chain_1), numpy.column_stack((x,accObj1))) plotDictionary = self.makeGraphDataDictionary(id, fileName, "AccessibilityPlot", "Number of Residue", "Relative Accessibility", x, accObj1, 0, xlim, 0) stored.plots.append(plotDictionary) #Plot accessibility for chain 2 fileName = "%s_%s_accessibilty" %(pymolObject1, chain_2) x = numpy.linspace(1, accObj2.shape[0], num = accObj2.shape[0]) if self.writeToFile: numpy.savetxt("%s_%s_accessibility.txt" %(pymolObject1, chain_2), numpy.column_stack((x, accObj2))) plotDictionary = self.makeGraphDataDictionary(id, fileName, "AccessibilityPlot", "Number of Residue", "Relative Accessibility", x, accObj2, 0, ylim, 0) stored.plots.append(plotDictionary) #diagonal, only if both chains have the same number of residues if distances.shape[0] == distances.shape[1]: fileName = "%s_%s-%s_diagonal" %(pymolObject1,chain_1,chain_2) x = numpy.linspace(1, diagonal.shape[0], num = diagonal.shape[0]) if self.writeToFile: numpy.savetxt(fileName+".txt", numpy.column_stack((x, diagonal))) plotDictionary = self.makeGraphDataDictionary(id, fileName, "DistancePlot", "Number of Residue", "Distance (Angstrom)", x, diagonal, 0, xlim, ylim) stored.plots.append(plotDictionary)
def _get_select_list(): ''' Get either a list of object names, or a list of chain selections ''' loaded_objects = [name for name in cmd.get_names('all', 1) if '_cluster_' not in name] # if single object, try chain selections if len(loaded_objects) == 1: chains = cmd.get_chains(loaded_objects[0]) if len(chains) > 1: loaded_objects = ['{} & chain {}'.format(loaded_objects[0], chain) for chain in chains] return loaded_objects
def zero_residues_sub(sel1, start=0, end=0, offset=0, chains=0): """ DESCRIPTION Renumbers the residues so that the given residue range starts at zero, or offset USAGE zero_residues_sub selection, start, end [, offset [, chains ]] EXAMPLES zero_residues_sub protName, 0, 10 # first residue is 0 zero_residues_sub protName, 0, 10, 5 # first residue is 5 zero_residues_sub protName, 0, 10, chains=1 # each chain starts at 0 zero_residues_sub * """ offset = int(offset) # variable to store the offset stored.first = None # get the names of the proteins in the selection names = [ '(model %s and (%s))' % (p, sel1) for p in cmd.get_object_list('(' + sel1 + ')') ] if int(chains): names = [ '(%s and chain %s)' % (p, chain) for p in names for chain in cmd.get_chains(p) ] # for each name shown for p in names: # get this offset ok = cmd.iterate("first %s and polymer and n. CA" % p, "stored.first=resv") # don't waste time if we don't have to #if not ok or stored.first == offset: if not ok: continue # reassign the residue numbers p = p + " and resi " + start + "-" + end cmd.alter("%s" % p, "resi=str(int(resi)-%s)" % str(int(start) - offset)) # update pymol cmd.rebuild()
def get_sequence(obj): aa1 = list("ACDEFGHIKLMNPQRSTVWY") aa3 = "ALA CYS ASP GLU PHE GLY HIS ILE LYS LEU MET ASN PRO GLN ARG SER THR VAL TRP TYR".split( ) aa123 = dict(zip(aa1, aa3)) # aa321 = dict(zip(aa3, aa1)) chains = cmd.get_chains(obj) seq_cat = '' for chain in chains: seq = cmd.get_fastastr(f'{obj} and chain {chain}') seq = seq.split()[1:] seq = ''.join(seq) seq_cat += seq seq_cat = np.asarray([aa123[r] for r in seq_cat]) return seq_cat
def testAsyncBuilds(self, rep, async_builds): target = "1aon" if async_builds: msg = '%s cpus' % max_threads else: msg = '1 cpu' cmd.set("async_builds", async_builds) cmd.load(self.datafile('1aon.pdb.gz'), target) for x in cmd.get_chains(): cmd.create("Chain_%s" % x, target + " & c. " + x) cmd.delete(target) with self.timing('%s' % msg): cmd.show_as(rep) cmd.draw()
def save_colored_fasta(filename, selection='(all)', gapped=1, quiet=1): ''' DESCRIPTION Save a html file with colored (by C-alpha atoms) fasta sequence. ''' from . import one_letter from pymol import Scratch_Storage gapped = int(gapped) selection = '(%s) and polymer and guide' % (selection) html = [] stored = Scratch_Storage() def callback(resv, resn, color): if stored.resv is None: stored.resv = resv - (resv % 70) if gapped: while stored.resv + 1 < resv: callback(stored.resv + 1, '-', 25) stored.resv += 1 if stored.resv % 70 == 1: html.append( ('</font>\n<br>%4d <font>' % (resv)).replace(' ', ' ')) stored.color = None c = cmd.get_color_tuple(color) color = '#%02x%02x%02x' % (c[0] * 255, c[1] * 255, c[2] * 255) aa = one_letter.get(resn, '-') if color != stored.color: html.append('</font><font color="' + color + '">') stored.color = color html.append(aa) for obj in cmd.get_object_list('(' + selection + ')'): for chain in cmd.get_chains('model %s and (%s)' % (obj, selection)): sele = 'model %s and chain "%s" and (%s)' % (obj, chain, selection) html.append('\n<br>>%s_%s<font>' % (obj, chain)) stored.resv = None if gapped else 0 stored.color = None cmd.iterate(sele, 'callback(resv, resn, color)', space=locals()) html.append('</font>') handle = open(filename, 'w') print('<html><body style="font-family:monospace">', file=handle) print(''.join(html), file=handle) print('</body></html>', file=handle) handle.close()
def chain_contact(): def chain_contact_loop(c, skip, chainPullList): d = 0 l = c + 1 while len(chainPullList) > l and (26 - d) >= 0: cmd.select('chain_contact', '%s w. 5 of %s' % (chainPullList[d], chainPullList[c + 1]), enable=0, quiet=1, merge=1) cmd.select('chain_contact', '%s w. 5 of %s' % (chainPullList[c + 1], chainPullList[d]), enable=0, quiet=1, merge=1) d += 1 l += 1 while d == (c + 1) or d in skip: d += 1 glb.update() cmd.hide('everything') cmd.show('mesh', 'all') cmd.color('gray40', 'all') objects = cmd.get_names('all') chainPullList = [] for i in cmd.get_chains(quiet=1): chainPullList.append('Chain-' + i) if len(chainPullList) < 2: showinfo( 'Notice', 'There needs to be two or more chains to run this functions.') return False c = 0 skip = [] while c < (len(chainPullList) - 1) and c < 26: skip.append(c + 1) chain_contact_loop(c, skip, chainPullList) c += 1 glb.procolor('chain_contact', 'mesh', 'cpk', None) cmd.delete('chain_contact') return chainPullList
def load_pdb(self, num_proteins_seen, screenshotting, pdb_file_name=""): # use pymol wiki cmd.delete("all") # prevent memory issues # choose a pdb id if num_proteins_seen > len(self.pedagogy): self.pdb_file_name = random.sample(self.pedagogy, 1)[0] elif (num_proteins_seen < len(self.pedagogy) and pdb_file_name == ""): self.pdb_file_name = self.pedagogy[num_proteins_seen] # fetch or load pdb self.pdb_file_path = "./pdbs/"+self.pdb_file_name+".pdb" if not os.path.exists(self.pdb_file_path): cmd.fetch(self.pdb_file_name, path="./inputs/pdbs", type="pdb") elif os.path.exists(self.pdb_file_path): cmd.load(self.pdb_file_path) cmd.remove("solvent") # summarize print(self.params.run_time_stamp, " is loading ", num_proteins_seen, self.pdb_file_path) print("") num_atoms = cmd.count_atoms("all") print("noise mean", self.params.noise_mean, "noise scale", self.params.noise_scale) # convert pdb2tensor original_model = cmd.get_model('all', 1) original_coords_list = cmd.get_model('all', 1).get_coord_list() original = tf.convert_to_tensor(np.array(original_coords_list), dtype=tf.float32) chains = cmd.get_chains() if (screenshotting): self.current_pdb_screenshot_path = self.params.screenshot_folder_path + "/" + self.pdb_file_name + "-" + str(num_proteins_seen) + "/" os.makedirs(self.current_pdb_screenshot_path) prepare_pymol() take_screenshot(self.params, self.pdb_file_name, num_proteins_seen, "0") num_steps = random.randint(self.params.min_steps_in_undock, self.params.max_steps_in_undock) self.undock(num_proteins_seen, screenshotting, num_steps, chains) undocked_coords_list = cmd.get_model('all', 1).get_coord_list() undocked = tf.convert_to_tensor(np.array(undocked_coords_list), dtype=tf.float32) # calculate center of mass dict self.center_of_mass_dict = AttrDict() self.center_of_mass_dict["all"] = cmd.centerofmass("all") for chain in chains: self.center_of_mass_dict[chain] = cmd.centerofmass("chain {}".format(chain)) features = np.array([self.extract(atom) for atom in original_model.atom]) features = tf.convert_to_tensor(features, dtype=tf.float32) #outputs output_tuple = (self.center_of_mass_dict, num_steps, undocked, features, original, chains) return output_tuple
def save_colored_fasta(filename, selection='(all)', gapped=1, quiet=1): ''' DESCRIPTION Save a html file with colored (by C-alpha atoms) fasta sequence. ''' from . import one_letter from pymol import Scratch_Storage gapped = int(gapped) selection = '(%s) and polymer and guide' % (selection) html = [] stored = Scratch_Storage() def callback(resv, resn, color): if stored.resv is None: stored.resv = resv - (resv % 70) if gapped: while stored.resv+1 < resv: callback(stored.resv+1, '-', 25) stored.resv += 1 if stored.resv % 70 == 1: html.append(('</font>\n<br>%4d <font>' % (resv)).replace(' ', ' ')) stored.color = None c = cmd.get_color_tuple(color) color = '#%02x%02x%02x' % (c[0]*255, c[1]*255, c[2]*255) aa = one_letter.get(resn, '-') if color != stored.color: html.append('</font><font color="' + color + '">') stored.color = color html.append(aa) for obj in cmd.get_object_list('(' + selection + ')'): for chain in cmd.get_chains('model %s and (%s)' % (obj, selection)): sele = 'model %s and chain "%s" and (%s)' % (obj, chain, selection) html.append('\n<br>>%s_%s<font>' % (obj, chain)) stored.resv = None if gapped else 0 stored.color = None cmd.iterate(sele, 'callback(resv, resn, color)', space=locals()) html.append('</font>') handle = open(filename, 'w') print('<html><body style="font-family:monospace">', file=handle) print(''.join(html), file=handle) print('</body></html>', file=handle) handle.close()
def creatDilutedObj(obj,ratio=0.5, name="ASUbase"): ''' recieve an object and create an object with diluted atoms base on the ratio input ''' obj_names = cmd.get_names('objects', 1, obj) Chains = cmd.get_chains('%s and (%s)' % (obj_names[0],"all")) chainSele = [obj_names[0]+" and chain "+chain for chain in Chains] # list of selection strings resList = [[at.resi for at in cmd.get_model(c).atom] for c in chainSele] resIndx = list(itertools.chain(*resList)) dilute = np.ceil(1/ratio).astype(int) diluteResList = resIndx[0::dilute] print('+'.join(diluteResList)) print("resIndx: " ,len(resIndx)) print("diluteResList: ",len(diluteResList)) diluteResSele = '+'.join(diluteResList) cmd.create(name,obj_names[0]+" and resi "+diluteResSele) print("Creating an object with ",len(diluteResList)," atom from the ",len(resIndx)," atom containing input object" ) return name
def get_chain_bb(selection): """ returns nested dictionary with format {object: {chain: list-of-bb-atoms}} """ bb_name = "BB" bb_beads = {} # get list of objects in selection objects = cmd.get_names(selection=selection) for obj in objects: chains = cmd.get_chains(obj) bb_beads[obj] = {} for c in chains: # if chain is empty string, put it in the "*" bin if not c: c = "*" id_list = cmd.identify(f"{obj} and chain {c} and name {bb_name}") bb_beads[obj][c] = id_list return bb_beads
def testFetchLocal(self): import urlparse with testing.mkdtemp() as fetch_path: names = [] cmd.set('fetch_path', fetch_path) cmd.set('fetch_host', urlparse.urlunsplit(['file', '', self.datafile('pdb.mirror'), '', ''])) cmd.fetch('1avy') names += ['1avy'] self.assertItemsEqual(cmd.get_names(), names) cmd.fetch('1avyB') names += ['1avyB'] self.assertItemsEqual(cmd.get_names(), names) self.assertEqual(cmd.get_chains('1avyB'), ['B']) cmd.fetch('1aq5', multiplex=1) names += ['1aq5_%04d' % (i+1) for i in range(20)] self.assertItemsEqual(cmd.get_names(), names)
def load(type, id, screenshot=False): # we clear pymol, make filename and path cmd.delete('all') file_name = f'{id}.{type}' if type is 'cif' else id dataset_path = os.path.join(DATASETS_ROOT, type) path = os.path.join(dataset_path, file_name) # we load the target print(f'load {id} {path}') if type is "cif": if not os.path.exists(path): cmd.fetch(id, path=dataset_path) elif os.path.exists(path): cmd.load(path) # make target clean_pymol() model = cmd.get_model('all', 1) target = get_positions(model) # make the model inputs if screenshot: prepare_pymol() take_screenshot("0") chains = cmd.get_chains('all') if len(chains) == 0: return False if random.random() < P_UNDOCK: print('undocking') undock(chains, type) if random.random() < P_UNFOLD: print('unfolding') unfold(chains) model = cmd.get_model('all', 1) positions = get_positions(model) if positions.shape[0] > MAX_ATOMS: print(f"{positions.shape[0]} IS TOO MANY ATOMS") return False features = get_features(model) masses = get_masses(model) numbers = get_numbers(model) features = tf.concat([features, masses, numbers], -1) target = tf.concat([target, features], -1) return make_example(id, target, positions, features, masses)
def split_chains(selection='(all)', prefix=None): ''' DESCRIPTION Create a single object for each chain in selection SEE ALSO split_states ''' count = 0 models = cmd.get_object_list('(' + selection + ')') for model in models: for chain in cmd.get_chains('(%s) and model %s' % (selection, model)): count += 1 if not prefix: name = '%s_%s' % (model, chain) else: name = '%s%04d' % (prefix, count) cmd.create(name, '(%s) and model %s and chain %s' % (selection, model, chain)) cmd.disable(model)
def split_chains(selection="(all)", prefix=None): """ DESCRIPTION Create a single object for each chain in selection SEE ALSO split_states """ count = 0 models = cmd.get_object_list("(" + selection + ")") for model in models: for chain in cmd.get_chains("(%s) and model %s" % (selection, model)): count += 1 if not prefix: name = "%s_%s" % (model, chain) else: name = "%s%04d" % (prefix, count) cmd.create(name, "(%s) and model %s and chain %s" % (selection, model, chain)) cmd.disable(model)
def fasta(selection='(all)', gapped=1, wrap=70): ''' DESCRIPTION Print sequence in FASTA format ARGUMENTS selection = string: atom selection {default: all} gapped = integer: put missing residues as dashes into the sequence {default: 1} wrap = integer: wrap lines, 0 for no wrapping {default: 70} SEE ALSO pir, pymol.exporting.get_fastastr ''' from . import one_letter gapped, wrap = int(gapped), int(wrap) selection = '(%s) and guide' % (selection) for obj in cmd.get_object_list(selection): for chain in cmd.get_chains('%s and (%s)' % (obj, selection)): seq = [] model = cmd.get_model('/%s//%s//CA and (%s)' % (obj, chain, selection)) prev_resi = 999999999 for atom in model.atom: if gapped: gap_len = max(0, atom.resi_number - prev_resi - 1) seq.extend('-' * gap_len) prev_resi = atom.resi_number seq.append(one_letter.get(atom.resn, 'X')) print('>%s_%s' % (obj, chain)) if wrap < 1: print(''.join(seq)) continue for i in range(0, len(seq), wrap): print(''.join(seq[i:i+wrap]))
def marshmallows(sel): cmd.delete("surf*") cmd.delete("map*") COLs = ("green", "cyan", "magenta", "yellow", "pink") COLs = COLs + COLs + COLs cmd.do("remove hydro") cmd.do("bg_color white") cmd.do("hide everything") cmd.do("set surface_quality, 1") cmd.do("alter all, b=50") cmd.do("alter all, q=1") cmd.do("set gaussian_resolution,10") for i, c in enumerate(cmd.get_chains(sel)): cmd.do("map_new map%s, gaussian, 1, (%s and chain %s), 10" % (c, sel, c)) cmd.do("isosurface surf%s, map%s" % (c, c)) cmd.do("color %s, surf%s" % (COLs[i % 2], c)) cmd.do("set antialias, 2") cmd.do("set ray_trace_gain, 0.4") cmd.do("set ray_shadows, 0") cmd.do("set specular, 0") cmd.do("show surface, surf*")
def select_gp160(alignment_score_cutoff = 800): ''' DESCRIPTION makes alignement groups for gp160 groups ARGUMENTS alignment_score_cutoff - default 1000, the alignment score that all the chains must pass in order to be considered as a gp160 molecule. If you get no alignments, try to lower this number. USAGE select_gp160 , [alignnment=int] ''' all_chains = cmd.get_chains() for chain in all_chains: #chain = "B" myspace = {'myset':set()} cmd.iterate('chain {}'.format(chain), 'myset.add((resi,resn))',space=myspace) chain_value_pair = myspace['myset'] chain_value_pair = get_sane_pairing(chain_value_pair) align(chain_value_pair,chain,ascutoff=alignment_score_cutoff)
def flatten_obj(name="",selection="",state=0,rename=0,quiet=1,chain_map=""): """ DESCRIPTION "flatten_obj" combines multiple objects or states into a single object, renaming chains where required USAGE flatten_obj name, selection[, state[, rename[, quiet[, chain_map]]]] ARGUMENTS name = a unique name for the flattened object {default: flat} selection = the set of objects to include in the flattening. The selection will be expanded to include all atoms of objects. {default: all} state = the source state to select. Use 0 or -1 to flatten all states {default: 0} rename = The scheme to use for renaming chains: {default: 0} (0) preserve chains IDs where possible, rename other chains alphabetically (1) rename all chains alphabetically (2) rename chains using the original chain letter, object name, and state quiet = If set to 0, print some additional information about progress and chain renaming {default: 1} chain_map = An attribute name for the 'stored' scratch object. If specified, `stored.<chain_map>` will be populated with a dictionary mapping the new chain names to a tuple giving the originated object, state, and chainID. {default: ""} NOTES Like the select command, if name is omitted then the default object name ("flat") is used as the name argument. Chain renaming is tricky. PDB files originally limited chains to single letter identifiers containing [A-Za-z0-9]. When this was found to be limiting, multi-letter chains (ideally < 4 chars) were allowed. This is supported as of PyMOL 1.7. Earlier versions do not accept rename=2, and will raise an exception when flattening a structure with more than 62 chains. EXAMPLES flatten_obj flat, nmrObj flatten_obj ( obj1 or obj2 ) SEE ALSO split_states """ # arguments # Single argument; treat as selection if name and not selection: selection = name name = "" # default name and selection if not name: name = "flat" if not selection: selection = "(all)" state = int(state) rename = int(rename) quiet = int(quiet) # Wrap in extra parantheses for get_object_list selection = "( %s )" % selection if rename == 0: chainSet = DefaultChainSet() elif rename == 1: chainSet = SequentialChainSet() elif rename == 2: chainSet = LongChainSet() else: raise ValueError("Unrecognized rename option (Valid: 0,1,2)") metaprefix = "temp" #TODO unique prefix # store original value of retain_order, which causes weird interleaving of # structures if enabled. retain_order = cmd.get("retain_order") try: cmd.set("retain_order",0) # create new object for each state for obj in cmd.get_object_list(selection): if state <= 0: # all states prefix = "%s_%s_"%(metaprefix,obj) cmd.split_states(obj,prefix=prefix) else: prefix = "%s_%s_%04d"%(metaprefix,obj,state) cmd.create(prefix, obj, state, 1) # renumber all states statere = re.compile("^%s_(.*)_(\d+)$" % metaprefix) # matches split object names warn_lowercase = False # Iterate over all objects with metaprefix try: for obj in cmd.get_object_list("(%s_*)"%(metaprefix) ): m = statere.match(obj) if m is None: print(("Failed to match object %s" %obj)) continue origobj = m.group(1) statenum = int(m.group(2)) chains = cmd.get_chains(obj) rev_chain_map = {} #old -> new, for this obj only for chain in sorted(chains,key=lambda x:(len(x),x)): new_chain = chainSet.map_chain(origobj,statenum,chain) rev_chain_map[chain] = new_chain if not quiet: print((" %s state %d chain %s -> %s"%(origobj,statenum,chain, new_chain) )) if not _long_chains: if len(new_chain) > 1: raise OutOfChainsError("No additional chains available (max 62).") space = {'rev_chain_map':rev_chain_map} cmd.alter(obj,"chain = rev_chain_map[chain]",space=space) print(("Creating object from %s_*"%metaprefix)) # Recombine into a single object cmd.create(name,"%s_*"%metaprefix) # Set chain_map if chain_map: setattr(stored,chain_map,chainSet) # Warn if lowercase chains were generated if cmd.get("ignore_case") == "on" and any([c.upper() != c for c in list(chainSet.keys())]): print("Warning: using lower-case chain IDs. Consider running the " "following command:\n set ignore_case, 0" ) finally: # Clean up print("Cleaning up intermediates") cmd.delete("%s_*"%metaprefix) finally: # restore original parameters print("Resetting variables") cmd.set("retain_order",retain_order)
def dyndom(mobile, target, window=5, domain=20, ratio=1.0, exe='', transform=1, quiet=1, mobile_state=1, target_state=1, match='align', preserve=0): ''' DESCRIPTION DynDom wrapper DynDom is a program to determine domains, hinge axes and hinge bending residues in proteins where two conformations are available. http://fizz.cmp.uea.ac.uk/dyndom/ USAGE dyndom mobile, target [, window [, domain [, ratio ]]] ''' import tempfile, subprocess, os, shutil, sys from .exporting import save_pdb_without_ter window, domain, ratio = int(window), int(domain), float(ratio) transform, quiet = int(transform), int(quiet) mobile_state, target_state = int(mobile_state), int(target_state) mm = MatchMaker( '(%s) & polymer & state %d' % (mobile, mobile_state), '(%s) & polymer & state %d' % (target, target_state), match) chains = cmd.get_chains(mm.mobile) if len(chains) != 1: print('mobile selection must be single chain') raise CmdException chain1id = chains[0] chains = cmd.get_chains(mm.target) if len(chains) != 1: print('target selection must be single chain') raise CmdException chain2id = chains[0] if not exe: from . import which exe = which('DynDom', 'dyndom') if not exe: print(' Error: Cannot find DynDom executable') raise CmdException else: exe = cmd.exp_path(exe) tempdir = tempfile.mkdtemp() try: filename1 = os.path.join(tempdir, 'mobile.pdb') filename2 = os.path.join(tempdir, 'target.pdb') commandfile = os.path.join(tempdir, 'command.txt') infofile = os.path.join(tempdir, 'out_info') save_pdb_without_ter(filename1, mm.mobile, state=mobile_state) save_pdb_without_ter(filename2, mm.target, state=target_state) f = open(commandfile, 'w') f.write('title=out\nfilename1=%s\nchain1id=%s\nfilename2=%s\nchain2id=%s\n' \ 'window=%d\ndomain=%d\nratio=%.4f\n' % (filename1, chain1id, filename2, chain2id, window, domain, ratio)) f.close() process = subprocess.Popen([exe, commandfile], cwd=tempdir, universal_newlines=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) for line in process.stdout: if not quiet: sys.stdout.write(line) if process.poll() != 0: raise CmdException('"%s" failed with status %d' % (exe, process.returncode)) cmd.color('gray', mobile) fixed_name = dyndom_parse_info(infofile, mm.mobile, quiet) except OSError: print('Cannot execute "%s", please provide full path to DynDom executable' % (exe)) raise CmdException finally: if not int(preserve): shutil.rmtree(tempdir) elif not quiet: print(' Not deleting temporary directory:', tempdir) if transform and fixed_name is not None: cmd.align(fixed_name, target)
def nmrSpecCalcSphere(center, radius, **kwargs): """ Interface to sphere selection. @param center: the center of the selection shell (spherical). Center is a PyMOL selection. It may contain protons or heavy atoms. The residues of the atoms are the real center for selection shell! @param radius: radius of the selection sphere """ radius = float(radius) # get the parent objects olist = cmd.get_object_list(center) if len(olist) == 0: print "ERROR: No objects in center selection:", center return elif len(olist) > 1: print "WARNING: There are multiple objects in the center selection:" for o in olist: print o print " Only the 1st object is considered:", olist[0] obj = olist[0] # get the parent chains clist = cmd.get_chains(center) if len(clist) == 0: print "INFO: No chain names detected in center selection:", center print " Consider chain name empty." clist = [" "] elif len(clist) > 1: print "WARNING: There are multiple chains in the center selection:" for c in clist: print c print " Only the 1st chain is considered:", clist[0] chn = clist[0] # get parent residues # all residues involved in center are taken as center for # selection sphere stored.list = [] cmd.iterate(center, 'stored.list.append(resi)' ) # do not use the n. CA trick because there may be no CA in the center selection unik_resi = [] for i in stored.list: if i not in unik_resi: unik_resi.append(i) center_res = "%s//\"%s\"/%s/" % (obj, chn.strip(), '+'.join(unik_resi)) # save selection to temp pdb output if chn == " ": sel = "%s and (byres %s expand %d)" % \ (obj, center_res, radius) else: sel = "%s and chain \"%s\" and (byres %s expand %d)" % \ (obj, chn, center_res, radius) # get selected N/C atoms (for deciding which spactra to calc) stored.list = [] cmd.iterate(center, 'stored.list.append(name)') NC_atoms = [n for n in stored.list if n[0] in ['C', 'N']] if not NC_atoms: # if no N/C atoms found, expand to connected atoms print 'INFO: No N/C atoms found in center selection.' print 'INFO: Try to examine atoms covalently bonded to center selection.' conn_heavy_atoms = '(' + center + ' around 1.2) and (not hydro)' # use around to exclude center stored.list = [] cmd.iterate(conn_heavy_atoms, 'stored.list.append(name)') NC_atoms = [n for n in stored.list if n[0] in ['C', 'N']] # find out which spectra to calc (only if parameter 'spectra' is not specified) has_N, has_C = False, False print 'INFO: N/C atoms in the center selection or atoms covalently bonded:', ','.join(NC_atoms) for a in NC_atoms: if a[0] == 'N': has_N = True elif a[0] == 'C': has_C = True spectra_to_calc = [] if has_N and has_C: spectra_to_calc = ['HNH','CNH','NNH','HCH','CCH'] elif has_N: spectra_to_calc = ['HNH','CNH','NNH'] elif has_C: spectra_to_calc = ['HCH','CCH'] else: print 'WARNING: No N/C atoms found in center selection or atoms covalently bonded.' print 'WARNING: All spectra (HNH,CNH,NNH,HCH,CCH) will be tried.' spectra_to_calc = ['HNH','CNH','NNH','HCH','CCH'] if 'spectra' not in kwargs: kwargs['spectra'] = spectra_to_calc print 'INFO: Spectra to calc:', ','.join(spectra_to_calc) else: print 'INFO: Spectra to calc (as user specified):', ','.join(kwargs['spectra']) # get protons in center # if there are any heavy atoms in center, the proton connected are included # use distance cutoff to find hydrogens within distance of 1.2 # PyMOL itself uses distance as criterion to detect covalent bonds center_hydro = '(' + center + ' expand 1.2) and hydro' # use expand to include center stored.list = [] cmd.iterate(center_hydro, 'stored.list.append((resi, resn, name))') print "INFO: hydrogen atoms in the center:" for h in stored.list: print ' ',h try: del kwargs['_self'] # from PyMOL except KeyError: pass _nmrSpecCalc(sel=sel, obj=obj, chn=chn, cth=center_hydro, **kwargs) return
def contactsDialog(root): """Create GUI""" PADDING=5 win = Toplevel(root, width=400, height=600, padx=PADDING, pady=PADDING) win.resizable(0,0) win.title("Visualize CASP RR contacts") #### MAIN CONTROLS #### frmMain = Frame(win) frmMain.pack(fill=BOTH, expand=1) frmMain.columnconfigure(0, weight=2, pad=PADDING) frmMain.columnconfigure(1, weight=5, pad=PADDING) frmMain.columnconfigure(2, weight=1, pad=PADDING) Label(frmMain, text="RR file:").grid(row=0,column=0,sticky=N+E) vTarget = StringVar(win) txtTarget = Entry(frmMain, textvariable=vTarget) txtTarget.grid(row=0, column=1, sticky=W+E) def browseTarget(): cf = tkFileDialog.askopenfilename(parent=win, filetypes=[("CASP RR files", ".CASPRR")]) if cf: vTarget.set(cf) cmdTarget = Button(frmMain, text="...", command=browseTarget) cmdTarget.grid(row=0, column=2, sticky=W+E) Label(frmMain, text="Target:").grid(row=1,column=0,sticky=N+E) lstObject = Listbox(frmMain, selectmode=SINGLE, exportselection=0, height=6) objects = [] molecules = ( n for n in cmd.get_names() if cmd.get_type(n) == "object:molecule") for n in molecules: for ch in cmd.get_chains(n): objects.append((n, ch)) lstObject.insert(END, "{0}/{1}".format(n,ch)) if objects: lstObject.selection_set(0) lstObject.grid(row=1, column=1, columnspan=2, sticky=N+E+S+W) Label(frmMain, text="Min. separation:").grid(row=2,column=0,sticky=E) vSeparation = IntVar(win) vSeparation.set(23) sclSeparation = Scale(frmMain, from_=0, to=100, orient=HORIZONTAL, variable=vSeparation) sclSeparation.grid(row=2, column=1, columnspan=2, sticky=W+E) Label(frmMain, text="Num. contacts:").grid(row=3,column=0,sticky=E) vContacts = IntVar(win) vContacts.set(25) sclContacts = Scale(frmMain, from_=1, to=500, orient=HORIZONTAL, variable=vContacts) sclContacts.grid(row=3, column=1, columnspan=2, sticky=W+E) Label(frmMain, text="Use atoms:").grid(row=4,column=0,sticky=E) catms = sorted(contact_atoms.keys()) lstAtomMappings = Listbox(frmMain, selectmode=MULTIPLE, exportselection=0, height=4) for n in catms: lstAtomMappings.insert(END, n) lstAtomMappings.selection_set(1) lstAtomMappings.grid(row=4, column=1, columnspan=2, sticky=N+E+S+W) #### BUTTONS ROW #### frmButtons = Frame(win) frmButtons.pack(fill=X, expand=1) btnCancel = Button(frmButtons, text="Close", command=lambda: win.destroy()) btnCancel.pack(side=RIGHT, pady=PADDING) def validate(): if not vTarget.get(): tkMessageBox.showwarning("No CASP RR file", "Please specify a valid CASP RR file to visualize!") return False if not lstObject.curselection(): tkMessageBox.showwarning("No Mapping Target", "Please specify a molecule to map contacts on!") return False if not lstAtomMappings.curselection(): tkMessageBox.showwarning("No Atom Selection", "Please specify at least one set of atoms to map contacts on!") return False return True def confirm(): if not validate(): return contactFile = vTarget.get() target, chain = objects[int(lstObject.curselection()[0])] num_contacts = vContacts.get() min_separation = vSeparation.get() #atom_mapping = contact_atoms[ vAtomMappings.get() ] atom_mapping = [ contact_atoms[catms[int(k)]] for k in lstAtomMappings.curselection() ] print atom_mapping show_contacts(contactFile, target, chain, num_contacts, min_separation, atom_mapping) btnOK = Button(frmButtons, text="Show", command=confirm) btnOK.pack(side=RIGHT) browseTarget()
def format_bonds( selection='all', bonds=4, ): ''' DESCRIPTION Formats bonds in aromatic or charged residues EXAMPLE frag PHE format_bonds USAGE format_bonds [ selection [, bonds ]] ARGUMENTS selection: <str> input selection {default: 'all'} bonds: <int> toogles format of bonds 1: single bonds (deactivates valence display) 2: regular double bonds (activates valence display) >=3: delocalized (activates valence display) ''' # Selection try: # group selection with bracketing and select complete residues selection = '(byres (' + str(selection) + '))' # checks functional selection cmd.count_atoms(selection) except: print "invalid selection" return False # PARAMETERS try: bonds = int(bonds) except: pass if (not (bonds in [1, 2])): bonds = 4 if bonds == 1: cmd.set('valence', 0) print "Valence display disabled!" return bonds else: cmd.set('valence', 1) print "Valence display enabled!" # proceed ##### SELECTION BY OBJECT AND CHAIN ##### # variable for the selections # get the names of the proteins in the selection objects = cmd.get_object_list(selection) # include chains # subselect chains names = [] for p in objects: for chain in cmd.get_chains('model ' + p) or ['']: names.append("(model %s and chain '%s')" % (p, chain)) ##### SELECTION LISTS ##### # get TRP stored.temp = [] for p in names: cmd.iterate(( '(%s) and (resn TRP+NIW) ' 'and (name CA)' % (p)), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) # the integer is to ensure unique keys TRP_tuple = (1,) + tuple(stored.temp) # get PHETYR stored.temp = [] for p in names: cmd.iterate(( '(%s) and (resn PHE+TYR+PTR+NIY+PNIY) ' 'and (name CA)' % (p)), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) PHETYR_tuple = (2,) + tuple(stored.temp) # get HIS stored.temp = [] for p in names: cmd.iterate(( '(%s) and (resn HIS) ' 'and (name CA)' % (p)), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) HIS_tuple = (3,) + tuple(stored.temp) # get NITRO stored.temp = [] for p in names: cmd.iterate(( '(%s) and (resn NIY+PNIY+NIW) ' 'and (name CA)' % (p)), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) NITRO_tuple = (4,) + tuple(stored.temp) # get GLU stored.temp = [] for p in names: cmd.iterate(( '(%s) and (resn GLU) ' 'and (name CA)' % (p)), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) GLU_tuple = (5,) + tuple(stored.temp) # get ASP stored.temp = [] for p in names: cmd.iterate(( '(%s) and (resn ASP) ' 'and (name CA)' % (p)), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) ASP_tuple = (6,) + tuple(stored.temp) # get CTERM stored.temp = [] for p in names: cmd.iterate( '(byres (last %s)) and (not (hetatm)) ' 'and (name OXT)' % (p), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) CTERM_tuple = (7,) + tuple(stored.temp) # get ARG stored.temp = [] for p in names: cmd.iterate(( '(%s) and (resn ARG) ' 'and (name CA)' % (p)), 'stored.temp.append("(%s and resi "+str(resi)+")")' % p ) ARG_tuple = (8,) + tuple(stored.temp) ##### SELECTION TUPLES DONE ##### ##### ATOM LISTS ##### TRP_bonds_all = [ ['CG', 'CD1'], ['CD1', 'NE1'], ['NE1', 'CE2'], ['CE2', 'CD2'], ['CD2', 'CG'], ['CD2', 'CE3'], ['CE3', 'CZ3'], ['CZ3', 'CH2'], ['CH2', 'CZ2'], ['CZ2', 'CE2'] ] TRP_bonds_double = [ ['CG', 'CD1'], ['CE2', 'CD2'], ['CE3', 'CZ3'], ['CH2', 'CZ2'] ] PHETYR_bonds_all = [ ['CG', 'CD1'], ['CD1', 'CE1'], ['CE1', 'CZ'], ['CZ', 'CE2'], ['CE2', 'CD2'], ['CD2', 'CG'] ] PHETYR_bonds_double = [ ['CG', 'CD1'], ['CE1', 'CZ'], ['CE2', 'CD2'] ] HIS_bonds_all = [ ['CG', 'CD2'], ['CD2', 'NE2'], ['NE2', 'CE1'], ['CE1', 'ND1'], ['ND1', 'CG'], ] HIS_bonds_double = [ ['CG', 'CD2'], ['CE1', 'ND1'] ] NITRO_bonds_all = [ ['NN', 'O1'], ['NN', 'O2'] ] NITRO_bonds_double = [ ['NN', 'O1'] ] GLU_bonds_all = [ ['CD', 'OE1'], ['CD', 'OE2'] ] GLU_bonds_double = [ ['CD', 'OE1'] ] ASP_bonds_all = [ ['CG', 'OD1'], ['CG', 'OD2'] ] ASP_bonds_double = [ ['CG', 'OD1'] ] CTERM_bonds_all = [ ['C', 'O'], ['C', 'OXT'] ] CTERM_bonds_double = [ ['C', 'O'] ] ARG_bonds_all = [ ['CZ', 'NH1'], ['CZ', 'NH2'] ] ARG_bonds_double=[ ['CZ','NH1'] ] ##### FORMATING ##### # dictionary: entries:atoms format_dict = { TRP_tuple: [TRP_bonds_all, TRP_bonds_double], PHETYR_tuple: [PHETYR_bonds_all, PHETYR_bonds_double], HIS_tuple: [HIS_bonds_all, HIS_bonds_double], NITRO_tuple: [NITRO_bonds_all, NITRO_bonds_double], GLU_tuple: [GLU_bonds_all, GLU_bonds_double], ASP_tuple: [ASP_bonds_all, ASP_bonds_double], CTERM_tuple: [CTERM_bonds_all, CTERM_bonds_double], ARG_tuple: [ARG_bonds_all, ARG_bonds_double] } if bonds != 2: lines = 4 print "Formating as delocalized bonds" else: lines = 1 print "Formating as double bonds" # for all tuples (i.e format_dict.keys()) for p in format_dict.keys(): # go through list except ID at pos 1 for q in p[1:]: # format bonds for r in format_dict[p][0]: cmd.unbond('%s and name %s' % (q, r[0]), '%s and name %s' % (q, r[1])) cmd.bond('%s and name %s' % (q, r[0]), '%s and name %s' % (q, r[1]), lines) if lines == 1: # add double bonds for r in format_dict[p][1]: cmd.unbond('%s and name %s' % (q, r[0]), '%s and name %s' % (q, r[1])) cmd.bond('%s and name %s' % (q, r[0]), '%s and name %s' % (q, r[1]), 2) return bonds
def snp_common(record, selection, label, name, quiet): ''' Common part of snp_uniprot and snp_ncbi. Argument `record' must be a Bio.SwissProt.Record object with `sequence', `entry_name' and `features' fields defined. ''' from . import one_letter from .seqalign import needle_alignment, alignment_mapping label = int(label) quiet = int(quiet) pdbids = cmd.get_object_list(selection) chains = cmd.get_chains(selection) if len(pdbids) != 1: print('please select one object') return snpi = set() snpi_str = [] labels = dict() for chain in chains: print('chain ' + chain) res_list = [] cmd.iterate('(%s) and chain %s and name CA' % (selection, chain), 'res_list.append((resn,resv))', space=locals()) seq = ''.join([one_letter.get(res[0], 'X') for res in res_list]) align = needle_alignment(record.sequence, seq) if not quiet: align._records[0].id = record.entry_name align._records[1].id = pdbids[0] + '_' + chain print(align.format('clustal')) map1 = dict(alignment_mapping(*align)) for feature in record.features: if feature[0] != 'VARIANT' or feature[1] != feature[2]: continue i = feature[1] if (i-1) not in map1: if not quiet: print('not mapped', feature) continue resi = res_list[map1[i-1]][1] snpi.add(resi) if not quiet: print('%s`%d' % res_list[map1[i-1]], feature[2:4]) if label: labels.setdefault((chain, resi), []).append(feature[3].split(' (')[0]) if len(snpi) > 0: snpi_str.append('(chain %s and resi %s)' % (chain, '+'.join(map(str, snpi)))) for chain, resi in labels: lab = ', '.join(labels[(chain, resi)]) cmd.label('(%s) and chain %s and resi %d and name CA' % (selection, chain, resi), repr(lab)) if len(snpi_str) == 0: print('no missense variants') return if name == '': name = cmd.get_unused_name('nsSNPs') cmd.select(name, '(%s) and (%s)' % (selection, ' or '.join(snpi_str)))
def color_chains(rainbow=0): """ AUTHOR Kevin Houlihan adapted from a script by Gareth Stockwell USAGE color_chains(rainbow=0) This function colours each object currently in the PyMOL heirarchy with a different colour. Colours used are either the 22 named colours used by PyMOL (in which case the 23rd object, if it exists, gets the same colour as the first), or are the colours of the rainbow SEE ALSO util.color_objs() """ # Process arguments rainbow = int(rainbow) # Get names of all PyMOL objects # obj_list = cmd.get_names('objects') # don't color selections, alignments, measurements, etc. obj_list = cmd.get_names_of_type("object:molecule") chain_list = [] for obj in obj_list: for ch in cmd.get_chains(obj): # there seems to be a bug in pymol, some CA don't get colored #sele = obj + " and c. " + ch + " and (e. C or name CA)" #sele = obj + " and c. " + ch + " and e. C" sele = obj + " and c. " + ch chain_list.append(sele) if rainbow: #print "\nColouring objects as rainbow\n" nobj = len(obj_list) nchain = len(chain_list) # Create colours starting at blue(240) to red(0), using intervals # of 240/(nobj-1) for j in range(nchain): # hsv = (240-j*240/(nobj-1), 1, 1) # disparate colors for adjacent objects in sequence, colors heterodimers nicely hsv = (240 - ( (120*(j - j%2))/(nchain-1) + 120*(j%2) ), 1, 1) # Convert to RGB rgb = hsv_to_rgb(hsv) # Define the new colour cmd.set_color("col" + str(j), rgb) #print chain_list[j], rgb # Colour the object cmd.color("col" + str(j), chain_list[j]) util.cnc(chain_list[j]) else: #print "\nColouring objects using PyMOL defined colours\n" # List of available colours # standard pymol colors, I like these better # color sets listed at http://www.pymolwiki.org/index.php/Color_Values mainset1_colours = ['carbon', 'cyan', 'lightmagenta', 'yellow', 'salmon', 'hydrogen', 'slate', 'orange'] mainset2_colours = ['lime', 'deepteal', 'hotpink', 'yelloworange', 'violetpurple', 'grey70', 'marine', 'olive'] mainset3_colours = ['smudge', 'teal', 'dirtyviolet', 'wheat', 'deepsalmon', 'lightpink', 'aquamarine', 'paleyellow'] mainset4_colours = ['limegreen', 'skyblue', 'warmpink', 'limon', 'violet', 'bluewhite', 'greencyan', 'sand'] mainset5_colours = ['forest', 'lightteal', 'darksalmon', 'splitpea', 'raspberry', 'grey50', 'deepblue', 'brown'] #colours = mainset1_colours + mainset4_colours colours = mainset1_colours + mainset2_colours + mainset3_colours + mainset4_colours + mainset5_colours # colors in original script extra_colours = ['red', 'green', 'blue', 'yellow', 'violet', 'cyan', \ 'salmon', 'lime', 'pink', 'slate', 'magenta', 'orange', 'marine', \ 'olive', 'purple', 'teal', 'forest', 'firebrick', 'chocolate', \ 'wheat', 'white', 'grey' ] ncolours = len(colours) # Loop over objects i = 0 for ch in chain_list: #print " ", obj, ch, colours[i] cmd.color(colours[i], ch) util.cnc(ch) i += 1 i %= ncolours
def get_PDBChains(self): self.chains = cmd.get_chains('PDB')
cmd.set("use_shaders", 1) colors = ['red', 'green', 'blue', 'yellow', 'violet', 'cyan', \ 'salmon', 'lime', 'pink', 'slate', 'magenta', 'orange', 'marine', \ 'olive', 'purple', 'teal', 'forest', 'firebrick', 'chocolate', \ 'wheat', 'white', 'grey' ] for file in os.listdir("."): if file.endswith(".pdb"): n = os.path.splitext(file)[0] cmd.load(file) cmd.hide("all") cmd.show("cartoon") i = 4 for c in cmd.get_chains(): if (c and c.strip()): print(n + " has chain " + c) cmd.color(colors[i], "chain " + c) else: cmd.color(colors[i], "chain *") i += 1 print("Saving " + file + " as " + n + ".png") #cmd.png(n + ".png", 0, 0, 800) ###cmd.png(n + ".png", "3cm", "3cm", 600) ###cmd.png(n + ".png", 1200, 900, 300, 0, 0) #, 300, 0, 0) cmd.delete(n) #print("saved pngs") #cmd.quit()
def load_consurf(filename, selection, palette='red_white_blue', quiet=1): ''' DESCRIPTION Color by evolutionary conservation. Writes scores to b-factor. You need a "r4s.res" or "consurf.grades" input file. USAGE load_consurf filename, selection [, palette ] SEE ALSO consurfdb ''' import re from .seqalign import needle_alignment, alignment_mapping from . import one_letter # reduced pattern that matches both r4s.res and consurf.grades pattern = re.compile(r'\s*(\d+)\s+([A-Y])\s+([-.0-9]+)\s') scores = [] seqlist = [] if isinstance(filename, basestring): handle = open(filename) else: handle = filename if len(cmd.get_chains(selection)) > 1: print ' Warning: selection spans multiple chains' for line in handle: if line.startswith('#') or line.strip() == '': continue m = pattern.match(line) if m is None: continue scores.append(float(m.group(3))) seqlist.append(m.group(2)) selection = '(%s) and polymer' % selection model_ca = cmd.get_model(selection + ' and guide') model_seq = ''.join(one_letter.get(a.resn, 'X') for a in model_ca.atom) sequence = ''.join(seqlist) aln = needle_alignment(model_seq, sequence) scores_resi = dict((model_ca.atom[i].resi, scores[j]) for (i, j) in alignment_mapping(*aln)) cmd.alter(selection, 'b=scores.get(resi, -10)', space={'scores': scores_resi}, quiet=quiet) if palette: cmd.color('yellow', selection + ' and b<-9') if ' ' in palette: from .viewing import spectrumany as spectrum else: spectrum = cmd.spectrum spectrum('b', palette, selection + ' and b>-9.5')