def morph_movie(morph, view, color, base_name, frameno_offset=0, morph_reverse=False): number_of_states = cmd.count_states(morph) # after this I will have each state available as its own object, called "{morphname}_000x" (padded to length of 4) # x starts with 1 --> not clear - that might depend on the numbering in the input file cmd.split_states(morph) last_frame = frameno_offset for statenum in range(0, number_of_states + 1): if morph_reverse: statenum = max(1, cmd.count_states(morph) - statenum) else: statenum = min(statenum + 1, cmd.count_states(morph)) state_name = morph + "_" + str(statenum).zfill(4) clump_representation([state_name], color, state_name), cmd.set_view(view) cmd.png(base_name + str(last_frame).zfill(3), width=1920, height=1080, ray=True) clump_cleanup([state_name], state_name) cmd.remove(state_name) last_frame += 1 return last_frame
def mican(mobile, target, option=""): ''' DESCRIPTION Align two (ligand) selections based on mican algorithm. ARGUMENTS mobile = str: atom selection of mobile object target = str: atom selection of target object EXAMPLE fetch 3zcf 4n8t, bsync=0 mcsalign /3zcf//A/HEC, /4n8t//A/HEM zoom /4n8t//A/HEM, animate=2, buffer=3 ''' import subprocess import tempfile import os with tempfile.TemporaryDirectory() as dname: # print tmp dir name print("Temporary directory =" + dname) # make sure you have mican in PATH # directly giving 'execute' full path below is good alternative # For example : execute = "/usr/bin/mican" execute = "mican" tmptarget = dname + "/target.pdb" tmpmobile = dname + "/mobile.pdb" tmpout = dname + "/aligned.pdb" # save pdb for mican cmd.save(tmptarget, target) cmd.save(tmpmobile, mobile) modeoption = "-" + option option2 = "-o" outfile = tmpout mican = [execute, tmpmobile, tmptarget, option2, outfile] for op in option.split(): if(op == "-o"): print("option -o is reserved") raise CmdException mican.append(op) proc=subprocess.run(mican,stdout = subprocess.PIPE) print(proc.stdout.decode("utf8")) # print result to pymol console cmd.load(outfile, "aligned") cmd.split_states("aligned") cmd.select("mobileback",mobile + " and backbone") cmd.align("mobileback", "aligned_0001 and backbone") # use cmd pair_fit if you think align is not good # print("Using cmd.align instead of cmd.pair_fit") # pymol.cmd.pair_fit("mobileback", "aligned_0001 and backbone") cmd.delete("mobileback") cmd.delete("aligned") cmd.delete("aligned_0001") cmd.delete("aligned_0002")
def export_pymol(file1, file2): py_object1 = file1.rsplit(".sdf", maxsplit=1)[0] py_object2 = file2.rsplit(".sdf", maxsplit=1)[0] pref1 = py_object1.split("/")[-1] pref2 = py_object2.split("/")[-1] cmd.load(filename=file1) cmd.set_name(pref1, "query_conf") cmd.load(filename=file2) cmd.split_states(object="query_conf") cmd.split_states(object=pref2) cmd.delete("query_conf") cmd.delete(pref2) cmd.save(f"{py_object2}.pse") cmd.delete("all")
def split_states_chains_save( sel1 ): ''' Simple script to split states of NMR ensemble and then save the complex and chain A separately for each state ''' cmd.remove("resn hoh") for ind in range(1,cmd.count_states(sel1)+1): if cmd.count_states(sel1) > 1: cmd.split_states(sel1) file_prefix="{0}_{1:04d}".format(sel1, ind) else: file_prefix=sel1 cmd.save(file_prefix + "_complex.pdb", file_prefix) cmd.split_chains(file_prefix) cmd.save(file_prefix + ".pdb", file_prefix + "_A")
def get_ligand(ligand_path): ''' DESCRIPTION Reads in docked poses generated by Vina, saves first mode (highest binding affinity) as ligand.mol2 ''' base = os.path.basename(ligand_path) FILENAME = os.path.splitext(base)[0] #print "Loading %s\n", % (ligand_path) cmd.load( ligand_path ) # load "C:/Users/Janet Liu/Documents/2019-2020/CS221/Project/data_files/stx_hnav1-7/ligand_out.pdbqt" cmd.split_states(FILENAME) cmd.save( os.path.dirname(ligand_path) + "/ligand.mol2", FILENAME + "_0001", -1, 'mol2')
def toGroup(groupName, sel, prefix="", delOrig=True): """ DESCRIPTION toGroup will take a multistate object and extract it to a group with N objects all in state #1. It essentially performs the following: split_states myObj, prefix=somePrefix group newGroup, somePrefix* delete myObj PARAMETERS: groupName (string) The name of the group to create sel (string) The name of the selection/object from which to make the group prefix (string) The prefix of the names of each of split states. For example, if your prefix is ''obj'' and is in states 1 through 100 then the states will be labeled obj1, obj2, obj3, ..., obj100. delOrig (string/boolean) If true then delete the original selection, otherwise not. RETURN Nothing, it makes a new group. """ if prefix == "": prefix = sel + "_grouped" cmd.split_states(sel, prefix=prefix) cmd.group(groupName, prefix + "*") if delOrig: cmd.delete(sel)
def toGroup(groupName,sel,prefix="",delOrig=True): """ DESCRIPTION toGroup will take a multistate object and extract it to a group with N objects all in state #1. It essentially performs the following: split_states myObj, prefix=somePrefix group newGroup, somePrefix* delete myObj PARAMETERS: groupName (string) The name of the group to create sel (string) The name of the selection/object from which to make the group prefix (string) The prefix of the names of each of split states. For example, if your prefix is ''obj'' and is in states 1 through 100 then the states will be labeled obj1, obj2, obj3, ..., obj100. delOrig (string/boolean) If true then delete the original selection, otherwise not. RETURN Nothing, it makes a new group. """ if prefix=="": prefix=sel + "_grouped" cmd.split_states(sel, prefix=prefix) cmd.group(groupName,prefix+"*") if delOrig: cmd.delete(sel)
def extract_state_to_object(morph, state, new_object): number_of_states = cmd.count_states(morph) if number_of_states < state: print("in extract_state_to_object() requested state " + "{} > number of states ({}) available in {}".format( state, number_of_states, morph)) exit() if morph == new_object: print("in extract_state_to_object() please picked different " + "name for extraction (currently bouth '{}') ".format(morph)) exit() # after this I will have each state available as its own object, called "{morphname}_000x" (padded to length of 4) # x starts with 1 --> not clear - that might depend on the numbering in the input file cmd.split_states(morph) state_name = morph + "_" + str(state).zfill(4) cmd.copy(new_object, state_name) for statenum in range(1, number_of_states + 1): state_name = morph + "_" + str(statenum).zfill(4) cmd.delete(state_name) cmd.delete(morph) return
def test_split_states(self): cmd.fragment('ala', 'm1') cmd.create('m1', 'm1', 1, 2) cmd.create('m1', 'm1', 1, 3) cmd.split_states('m1', 1, 2) self.assertItemsEqual(['m1', 'm1_0001', 'm1_0002'], cmd.get_names())
def flatten_obj(name="",selection="",state=0,rename=0,quiet=1,chain_map=""): """ DESCRIPTION "flatten_obj" combines multiple objects or states into a single object, renaming chains where required USAGE flatten_obj name, selection[, state[, rename[, quiet[, chain_map]]]] ARGUMENTS name = a unique name for the flattened object {default: flat} selection = the set of objects to include in the flattening. The selection will be expanded to include all atoms of objects. {default: all} state = the source state to select. Use 0 or -1 to flatten all states {default: 0} rename = The scheme to use for renaming chains: {default: 0} (0) preserve chains IDs where possible, rename other chains alphabetically (1) rename all chains alphabetically (2) rename chains using the original chain letter, object name, and state quiet = If set to 0, print some additional information about progress and chain renaming {default: 1} chain_map = An attribute name for the 'stored' scratch object. If specified, `stored.<chain_map>` will be populated with a dictionary mapping the new chain names to a tuple giving the originated object, state, and chainID. {default: ""} NOTES Like the select command, if name is omitted then the default object name ("flat") is used as the name argument. Chain renaming is tricky. PDB files originally limited chains to single letter identifiers containing [A-Za-z0-9]. When this was found to be limiting, multi-letter chains (ideally < 4 chars) were allowed. This is supported as of PyMOL 1.7. Earlier versions do not accept rename=2, and will raise an exception when flattening a structure with more than 62 chains. EXAMPLES flatten_obj flat, nmrObj flatten_obj ( obj1 or obj2 ) SEE ALSO split_states """ # arguments # Single argument; treat as selection if name and not selection: selection = name name = "" # default name and selection if not name: name = "flat" if not selection: selection = "(all)" state = int(state) rename = int(rename) quiet = int(quiet) # Wrap in extra parantheses for get_object_list selection = "( %s )" % selection if rename == 0: chainSet = DefaultChainSet() elif rename == 1: chainSet = SequentialChainSet() elif rename == 2: chainSet = LongChainSet() else: raise ValueError("Unrecognized rename option (Valid: 0,1,2)") metaprefix = "temp" #TODO unique prefix # store original value of retain_order, which causes weird interleaving of # structures if enabled. retain_order = cmd.get("retain_order") try: cmd.set("retain_order",0) # create new object for each state for obj in cmd.get_object_list(selection): if state <= 0: # all states prefix = "%s_%s_"%(metaprefix,obj) cmd.split_states(obj,prefix=prefix) else: prefix = "%s_%s_%04d"%(metaprefix,obj,state) cmd.create(prefix, obj, state, 1) # renumber all states statere = re.compile("^%s_(.*)_(\d+)$" % metaprefix) # matches split object names warn_lowercase = False # Iterate over all objects with metaprefix try: for obj in cmd.get_object_list("(%s_*)"%(metaprefix) ): m = statere.match(obj) if m is None: print(("Failed to match object %s" %obj)) continue origobj = m.group(1) statenum = int(m.group(2)) chains = cmd.get_chains(obj) rev_chain_map = {} #old -> new, for this obj only for chain in sorted(chains,key=lambda x:(len(x),x)): new_chain = chainSet.map_chain(origobj,statenum,chain) rev_chain_map[chain] = new_chain if not quiet: print((" %s state %d chain %s -> %s"%(origobj,statenum,chain, new_chain) )) if not _long_chains: if len(new_chain) > 1: raise OutOfChainsError("No additional chains available (max 62).") space = {'rev_chain_map':rev_chain_map} cmd.alter(obj,"chain = rev_chain_map[chain]",space=space) print(("Creating object from %s_*"%metaprefix)) # Recombine into a single object cmd.create(name,"%s_*"%metaprefix) # Set chain_map if chain_map: setattr(stored,chain_map,chainSet) # Warn if lowercase chains were generated if cmd.get("ignore_case") == "on" and any([c.upper() != c for c in list(chainSet.keys())]): print("Warning: using lower-case chain IDs. Consider running the " "following command:\n set ignore_case, 0" ) finally: # Clean up print("Cleaning up intermediates") cmd.delete("%s_*"%metaprefix) finally: # restore original parameters print("Resetting variables") cmd.set("retain_order",retain_order)
def scene_interpolate(view_init_str, object_properties, base_name, number_of_frames=15, frameno_offset=0, view_last_str=None, morph_properties=None): # the hope is to get rid of this some day # though it does not seem that it will happen before we move to blender global lipid_resnums if "lipid" in object_properties.keys(): style_lipid("lipid") props = object_properties["lipid"] # the fifth argument is transparency - so the attempt was made to fiddle with lipd transparency if len(props) > 5: # lipid is a special problem because I chose to represent it with sticks and transparency # does not work for raytraced sticks (duh) with open("{}/{}".format(structure_home, structure_filename["lipid"])) as inf: for line in inf: if line[:4] != "ATOM": continue lipid_resnums.add(line.split()[4]) if morph_properties: morph_lengths = set() for morph_name in morph_properties.keys(): morph_lengths.add(cmd.count_states(morph_name)) cmd.split_states(morph_name) if len(morph_lengths) > 1: print("morphs are all expected to be of the same length; found", morph_lengths) exit() if len( morph_lengths ) == 0: # can this happen? morph_properties should be False in thate casem shouldn't it morph_properties = None else: number_of_frames = morph_lengths.pop() # I could not find the way to move the surface, if there's one provided # so I'm moving the object and re-creating the mesh view_init = view_string2view(view_init_str) if view_last_str else None view_last = view_string2view(view_last_str) if view_last_str else None # get quaternions for interpolation qstart = view2quat(view_init) if view_last_str else None qend = view2quat(view_last) if view_last_str else None for objnm in object_properties.keys(): cmd.hide("everything", objnm) clump_cleanup([objnm], objnm) last_frame = frameno_offset for frameno in range(0, number_of_frames + 1): # object position interpolation tmpnames = object_tfm_interpolate(object_properties, number_of_frames, frameno) # view interpolation, if called for if not view_last_str: view = view_init_str else: view = view2view_string( intermediate_view(view_init, view_last, qstart, qend, number_of_frames, frameno)) # morph if morph_properties: tmp_obj_props = {} for morph_name, props in morph_properties.items(): [ morph_color, morph_reverse, tfm_from, tf_to, tfm_reverse, transparency ] = props # change shape if morph_reverse: stateno = max(1, cmd.count_states(morph_name) - frameno) else: stateno = min(frameno + 1, cmd.count_states(morph_name)) morph_state_name = morph_name + "_" + str(stateno).zfill(4) # [identity_tfm, gbg_tfm, tfm_reverse, color, small_molecule rep] tmp_obj_props[morph_state_name] = [ tfm_from, tf_to, tfm_reverse, morph_color, False, transparency ] # reposition all morphs and show them as clumps tmpnames.extend( object_tfm_interpolate(tmp_obj_props, number_of_frames, frameno)) # some of the ops above move camera (why the f do you have to move the camera to create a new rep?) cmd.set_view(view) cmd.png(base_name + str(last_frame).zfill(3), width=1920, height=1080, ray=True) object_cleanup(tmpnames) last_frame += 1 return last_frame
def visual(self, uno): cmd.reinitialize() nombre = uno.replace(".dlg", ".pdb") cmd.load(self.dirct + nombre, "COMP") cmd.split_states("COMP")
# sys.path.insert(1, '/usr/local/Cellar/pymol/1.7.0.0/lib/python2.7/site-packages/') import pymol from pymol import cmd pymol.finish_launching() if len(sys.argv) != 6: print("Usage: %s pdb-file align-sele save-sele align-atoms frame-pdb") sys.exit(1) struct_path = sys.argv[1] struct_name = os.path.splitext(os.path.basename(struct_path))[0] cmd.load(struct_path, struct_name) cmd.split_states(struct_name, 1, 1, "temp") cmd.delete(struct_name) # cmd.save('%s_debug.pse' % struct_name) cmd.set_name("temp0001", struct_name) frame_bname = "frame" sys # frame_path = './aa_frames/ile_frame.pdb' frame_path = sys.argv[5] cmd.load(frame_path, frame_bname) # target_sele = 'resn asp' target_sele = sys.argv[2] # dist = 6 # dist = sys.argv[3] user_sele = sys.argv[3]
def split_alignment(object, object1=None, object2=None, delimiter="_"): """DESCRIPTION Splits a two-state object into two separate objects. USAGE split_alignment object[, object1, object2][, delimiter] ARGUMENTS object Two-state input object object1 What to name the first state from the object [optional] object2 What to name the second state from the object [optional] delimiter Delimiter which separates object1 and object2 in the object name. See DETAILS. [default '_'] DETAILS The input object must contain at least two states. Additional states are ignored. If object1 and object2 are ommitted, the script will attempt to generate them based on the input object's name. The name is split around the first instance of <delimiter>. Thus, objects which follow the convention "object1_object2" will be properly split without additional arguments. EXAMPLES # Results in objects '1AX8.A' and '3PIV.A' split_alignment 1AX8.A_3PIV.A # Results in objects 'query' and 'target' split_alignment alignment, query, target # Results in objects '1AX8.A' and '3PIV.A' split_alignment 1AX8.A_vs_3PIV.A, delimiter="_vs_" """ # check that we have at least two states if cmd.count_states(object) < 2: print ("Error: input object must contain at least two states.") return prefix = "split%04d_" % random.randint(0, 9999) # make unique # guess output names if object1 is None and object2 is None: try: d = object.index(delimiter) object1 = object[:d] object2 = object[d + len(delimiter) :] except: object1 = "%s_%04d" % (object, 1) object2 = "%s_%04d" % (object, 2) print "Warning: '%s' not found in '%s'. Using names %s and %s." % (delimiter, object, object1, object2) # split them cmd.split_states(object, prefix=prefix) # rename to output names cmd.set_name("%s%04d" % (prefix, 1), object1) cmd.set_name("%s%04d" % (prefix, 2), object2) # delete other states for o in cmd.get_names("objects"): if o.startswith(prefix): cmd.delete(o)
def flatten_obj(name="", selection="", state=0, rename=0, quiet=1, chain_map=""): """ DESCRIPTION "flatten_obj" combines multiple objects or states into a single object, renaming chains where required USAGE flatten_obj name, selection[, state[, rename[, quiet[, chain_map]]]] ARGUMENTS name = a unique name for the flattened object {default: flat} selection = the set of objects to include in the flattening. The selection will be expanded to include all atoms of objects. {default: all} state = the source state to select. Use 0 or -1 to flatten all states {default: 0} rename = The scheme to use for renaming chains: {default: 0} (0) preserve chains IDs where possible, rename other chains alphabetically (1) rename all chains alphabetically (2) rename chains using the original chain letter, object name, and state quiet = If set to 0, print some additional information about progress and chain renaming {default: 1} chain_map = An attribute name for the 'stored' scratch object. If specified, `stored.<chain_map>` will be populated with a dictionary mapping the new chain names to a tuple giving the originated object, state, and chainID. {default: ""} NOTES Like the select command, if name is omitted then the default object name ("flat") is used as the name argument. Chain renaming is tricky. PDB files originally limited chains to single letter identifiers containing [A-Za-z0-9]. When this was found to be limiting, multi-letter chains (ideally < 4 chars) were allowed. This is supported as of PyMOL 1.7. Earlier versions do not accept rename=2, and will raise an exception when flattening a structure with more than 62 chains. EXAMPLES flatten_obj flat, nmrObj flatten_obj ( obj1 or obj2 ) SEE ALSO split_states """ # arguments # Single argument; treat as selection if name and not selection: selection = name name = "" # default name and selection if not name: name = "flat" if not selection: selection = "(all)" state = int(state) rename = int(rename) quiet = int(quiet) # Wrap in extra parantheses for get_object_list selection = "( %s )" % selection if rename == 0: chainSet = DefaultChainSet() elif rename == 1: chainSet = SequentialChainSet() elif rename == 2: chainSet = LongChainSet() else: raise ValueError("Unrecognized rename option (Valid: 0,1,2)") metaprefix = "temp" #TODO unique prefix # store original value of retain_order, which causes weird interleaving of # structures if enabled. retain_order = cmd.get("retain_order") try: cmd.set("retain_order", 0) # create new object for each state for obj in cmd.get_object_list(selection): if state <= 0: # all states prefix = "%s_%s_" % (metaprefix, obj) cmd.split_states(obj, prefix=prefix) else: prefix = "%s_%s_%04d" % (metaprefix, obj, state) cmd.create(prefix, obj, state, 1) # renumber all states statere = re.compile("^%s_(.*)_(\d+)$" % metaprefix) # matches split object names warn_lowercase = False # Iterate over all objects with metaprefix try: for obj in cmd.get_object_list("(%s_*)" % (metaprefix)): m = statere.match(obj) if m is None: print(("Failed to match object %s" % obj)) continue origobj = m.group(1) statenum = int(m.group(2)) chains = cmd.get_chains(obj) rev_chain_map = {} #old -> new, for this obj only for chain in sorted(chains, key=lambda x: (len(x), x)): new_chain = chainSet.map_chain(origobj, statenum, chain) rev_chain_map[chain] = new_chain if not quiet: print((" %s state %d chain %s -> %s" % (origobj, statenum, chain, new_chain))) if not _long_chains: if len(new_chain) > 1: raise OutOfChainsError( "No additional chains available (max 62).") space = {'rev_chain_map': rev_chain_map} cmd.alter(obj, "chain = rev_chain_map[chain]", space=space) print(("Creating object from %s_*" % metaprefix)) # Recombine into a single object cmd.create(name, "%s_*" % metaprefix) # Set chain_map if chain_map: setattr(stored, chain_map, chainSet) # Warn if lowercase chains were generated if cmd.get("ignore_case") == "on" and any( [c.upper() != c for c in list(chainSet.keys())]): print( "Warning: using lower-case chain IDs. Consider running the " "following command:\n set ignore_case, 0") finally: # Clean up print("Cleaning up intermediates") cmd.delete("%s_*" % metaprefix) finally: # restore original parameters print("Resetting variables") cmd.set("retain_order", retain_order)
'--hetatm', help= 'force all the atom to be defined as HETATM type. The HETATM atom have CONECT defined by default.', action='store_true') parser.add_argument('--addh', help='Add hydrogens', action='store_true') parser.add_argument('--select', help='Keep only the selected atoms', default='all') args = parser.parse_args() cmd.load(args.inp, 'inmol') if args.addh: cmd.h_add('all') if args.hetatm: cmd.alter('all', 'type="HETATM"') cmd.split_states('inmol') cmd.remove('inmol') basename = os.path.basename(os.path.splitext(args.inp)[0]) if args.format is None: outfmt = os.path.splitext(args.inp)[1][1:] else: outfmt = args.format all_objects = cmd.get_object_list() try: os.mkdir(args.outdir) except FileExistsError: pass for i, obj in enumerate(all_objects): sys.stdout.write(f'{i+1}/{len(all_objects)} saving {obj}\r') cmd.save(f'{args.outdir}/{basename}_{i:04d}.{outfmt}', f'{obj} and {args.select}')
if not os.path.exists(pdbpath): pdbpath = os.path.join(pbdir, 'final.pdb') if not os.path.exists(pdbpath) or not os.path.exists(cypath): raise SystemExit # does not seem like a ponderosa result directory f = open(cypath, 'r') ordered = f.read() f.close() oreg = ordered.replace(',', '+') # to alleviate a small bug. ordered = oreg.replace('+', ', ') # more common in papers from pymol import cmd cmd.load(pdbpath, 'for_rmsd') cmd.split_states('for_rmsd') # Backbone bb_rmsds = [] for i in range(2, 21): fit_result = cmd.align('for_rmsd_0001 & i. %s & n. N+CA+C+O' % (oreg), 'for_rmsd_%04d' % (i), cutoff=0) bb_rmsds += [ fit_result[0], ] # All heavy atoms ha_rmsds = [] for i in range(2, 21): fit_result = cmd.align('for_rmsd_0001 & i. %s & n. N*+C*+O*+S*' % (oreg),
in_msg = 'Select the sampling density from 1-4. \n\ Higher density means higher accuracy but slower performance.' density = int(s.show_inputdialog('Select the Sampling density', in_msg, '2')) if density < 1 or density > 4: s.show_message('Error', 'The sampling density is invalid.') raise SystemExit in_msg = 'Select the model number. \n\ Use 1 if there\'s only one model. Use 0 for all models.' model = int(s.show_inputdialog('Select the model', in_msg, '1')) cmd.delete('all') cmd.load(pdbpath, 'for_area') cmd.split_states('for_area') cmd.set('dot_solvent', 1) cmd.set('dot_density', density) if model != 0: # for a specific model try: area = cmd.get_area('resi ' + selection + ' and model for_area_' + '{:04d}'.format(model)) except: s.show_message('Error', 'The model number was wrong.') raise SystemExit print(area) message = 'The surface area for residue ' + selection + ' is ' + \ '{:.3f}'.format(area) + ' Angstroms^2'
from pymol import cmd ########### CHANGE THIS LIST AS NEEDED ####### delete_atoms = ['N21', 'C26', 'H27', 'H28', 'H29'] # atoms to be removed from rotamer library three_letter_code = 'NIR' # three-letter residue name specified in -n for molfile_to_params atoms_to_align = [ 'CL', 'CL1', 'CL2' ] # atoms around the lysine-ligand bond for alignment and visualisation ############################################## cmd.delete('*') cmd.load('conformers.mol2') target = 'conformers' cmd.split_states(target, prefix="Molecule_Name_") #cmd.split_states(target) cmd.delete(target) counter = 0 #target_atom1='' #target_atom2='' #target_atom3='' #target_atom4='' for object in cmd.get_object_list('(all)'): cmd.alter(object, 'resn="' + three_letter_code + '"') counter += 1 if counter == 1: target_object = object target_atom1 = target_object + ' and name ' + atoms_to_align[0] target_atom2 = target_object + ' and name ' + atoms_to_align[1] target_atom3 = target_object + ' and name ' + atoms_to_align[2]
def mutate( self, pdbid: str, replace_with: Dict[int, Optional[str]]) -> Union[List[str], rdchem.Mol]: """Modify amino acid residues at the defined positions. If the locations indexes exceed the amount in data, then they will be ignored and a warning will be announced. Params: ------- pdbid: str PDB ID associated with the structure. replace_with: dict The index location(s) within the full protein to replace certain residue(s) with. If a residue associated with a index location is None, then the modified residue is chosen randomly. If a certain index exceeds the number of available residues in the protein, then those enteries are simply ignored and the user is notified. Returns: -------- protein: list of str or rdkit.Chem.rdchem.Mol Modified protein with residues. If fmt="primary", then list of string (peptide names) is returned. If fmt="tertiary", then 3D molecule structure is returned. """ # Load PDB structure (download, if necessary) pdb_dir = maybe_create_dir(os.path.join(self.rootdir, pdbid)) pdb_file = os.path.join(pdb_dir, f"{pdbid}.pdb") if not os.path.exists(pdb_file): is_successful = cmd.fetch(pdbid, name=pdbid, state=1, type="pdb", path=pdb_dir) if is_successful == -1: raise DownloadError(f"Unable to download '{pdbid}'.") else: cmd.load(pdb_file, object=pdbid, state=1, format="pdb") # Get all residue names, see: https://pymolwiki.org/index.php/List_Selection resnames_dict = {"names": []} cmd.iterate("(name ca)", "names.append(resn)", space=resnames_dict) residue_names = resnames_dict["names"] num_residues = len(residue_names) # Cleanup idxs: remove indicies that exceed number of available residues nonvalid_idxs = [ idx for idx in replace_with.keys() if idx > num_residues ] for idx in nonvalid_idxs: print( f"OutOfRange: Removing idx {idx} (only {num_residues} residues)." ) replace_with.pop(idx) # Randomly choose an amino acid (AA) to replace residue, if None is provided. # Additionally, format string such that it is a valid 3 letter amino acid. for idx, residue in replace_with.items(): if residue is None: replace_with[idx] = np.random.choice(aa3) elif is_aa(residue): residue = residue.upper() if len(residue) == 1: replace_with[idx] = one_to_three.get(residue) elif len(residue) == 3: replace_with[idx] = residue else: raise ValueError( f"Invalid residue '{residue}'. Choose one from " f"the following {aa1+aa3}.") # Determine save filepath name modified_res_str = ":".join( [f"{k}{three_to_one.get(v)}" for k, v in replace_with.items()]) filename = f"{self.fmt}_{modified_res_str}" filename += ".pdb" if self.fmt == "tertiary" else ".json" save_filepath = os.path.join(self.rootdir, pdbid, filename) # Replace primary structure, i.e. residue names (str) if self.fmt == "primary": # Load data from cache, if it exists protein = None if os.path.exists(save_filepath): with open(save_filepath) as json_file: protein = json.load(json_file) if protein is None: for idx, residue in replace_with.items(): residue_names[ idx - 1] = residue # since PDB starts with idx of 1 protein = [three_to_one.get(name) for name in residue_names] # Save sequence temporarily _ = maybe_create_dir(save_filepath) with open(save_filepath, "w") as outfile: json.dump(protein, outfile) # Replace tertiary structure, i.e. residue's 3D coordinates elif self.fmt == "tertiary": if not os.path.exists(save_filepath): # Split states so that we can optimize only on specific state(s). # NOTE: Might be useful to choose lowest energy state to mutate, # OR mutate rotamers for all positions, then choose one with # lowest energy. cmd.split_states(object=pdbid) # Delete all other objects other than one we want to mutate # NOTE: For now, keep only first object. This might change # depending on which state needs to be kept. objs = cmd.get_object_list() # aka states keep_objs = [pdbid + "_0001"] for obj in objs: if obj not in keep_objs: cmd.delete(obj) assert keep_objs == cmd.get_object_list() # Mutate residues cmd.wizard("mutagenesis") wizard: Mutagenesis = cmd.get_wizard() for idx, res in replace_with.items(): selection = "{0:s}//A/{1:d}/".format(keep_objs[0], idx) wizard.do_select( selection) # select which residue index to replace wizard.set_mode( res) # choose name of residue to replace with wizard.do_state( 1 ) # select rotamer with least strain (aka conflicts w/ other atoms) wizard.apply() # apply point mutation cmd.set_wizard(None) # close wizard # Save PDB temporarily _ = maybe_create_dir(save_filepath) cmd.save(save_filepath, selection=pdbid, format="pdb") cmd.delete("all") # remove all objects, clears workspace # Load + choose model/structure with lowest energy # NOTE: If sanitize=True, the function checks if Mol has the correct # hybridization/valance structure (aka is it chemically reasonable). # When converting from the PDB block, this sometimes results in # improper parsing. Instead, for now, we just check if the Mol is # syntactically valid (i.e. all rings/branches closed, no illegal # atom types, etc). protein = rdmolfiles.MolFromPDBFile(save_filepath, sanitize=False, removeHs=False) if protein.GetNumConformers() > 1: protein = _get_conformer(protein, conformer="min", algo="MMFF") else: raise NotImplementedError # Remove file, if not needed if not self.cache: os.remove(save_filepath) return protein