def get_disulfide(self, selection, molid): """ Checks if the selection corresponds to a cysteine in a disulfide bond. Sets the patch line appropriately and matches atom names using a subgraph match to the normal cysteine residue Args: selection (VMD atomsel): Selection to check molid (int): VMD molecule ID to look for other CYS in Returns: resnames (dict int -> str) Residue name translation dictionary atomnames (dict int -> str) Atom name translation dictionary conect (int) Residue this one is connected to """ rgraph, _ = self.parse_vmd_graph(selection) # Sanity check if not self.known_res.get("CYX"): raise DabbleError("CYX undefined. Check forcefields!") # Check for the 3 join atoms corresponding to the disulfide bonds externs = self.get_extraresidue_atoms(selection) if len(externs) != 3: return (None, None, None) # With the AMBER format, the CYX residue should be a subgraph of this # residue as the only difference is the _join bond graph = self.known_res.get("CYX") matcher = isomorphism.GraphMatcher(rgraph, graph, \ node_match=self._check_atom_match) if matcher.subgraph_is_isomorphic(): match = next(matcher.match()) else: return (None, None, None) # Generate naming dictionaries to return nammatch = dict((i, graph.node[match[i]].get("atomname")) \ for i in match.keys() if \ graph.node[match[i]].get("residue") == "self") resmatch = dict((i, graph.node[match[i]].get("resname")) \ for i in match.keys() if \ graph.node[match[i]].get("residue") == "self") # Now we know it's a cysteine in a disulfide bond # Identify which resid and fragment corresponds to the other cysteine partners = [n for n in externs if \ atomsel("index %d" % n, molid=molid).get("element")[0] == "S"] if not partners: raise DabbleError("3 bonded Cys %d isn't a valid disulfide!" % selection.get('resid')[0]) osel = atomsel("index %d" % partners[0], molid=molid) conect = osel.get("residue")[0] return (resmatch, nammatch, conect)
def get_lipid_tails(self, selection, head): """ Obtains a name mapping for both ligand tails in a system given a selection describing the lipid and the indices of the head group atoms. Args: selection (VMD atomsel): Selection to pull tails from head (list of int): Atom indices in the head group of this lipid. Obtain with get_lipid_head function. Returns: (array of tuples that are dict int->str): Atom index to resname matched, atom index to atom name translation dictionaries for both tails Raises: ValueError: If a tail could not be matched or if there is an incorrect number of tails somehow attached. """ resname = selection.get('resname')[0] rgraph = self.parse_vmd_graph(selection)[0] rgraph.remove_nodes_from(head) if nx.number_connected_components(rgraph) != 2: raise DabbleError("Incorrect number of tails attached to %s:%s!" % (resname, selection.get('resid')[0])) taildicts = [] for tgraph in nx.connected_component_subgraphs(rgraph, copy=True): matched = False for matchname in (_ for _ in self.lipid_tails if \ self.known_res.get(_)): graph = self.known_res.get(matchname) truncated = nx.Graph(graph) truncated.remove_nodes_from([n for n in graph.nodes() if \ graph.node[n]["residue"] != "self"]) matcher = isomorphism.GraphMatcher( tgraph, truncated, node_match=self._check_atom_match) if matcher.is_isomorphic(): matched = True match = next(matcher.match()) nammatch = dict((i, graph.node[match[i]].get("atomname")) \ for i in match.keys() if \ graph.node[match[i]].get("residue") == "self") resmatch = dict((i, graph.node[match[i]].get("resname")) \ for i in match.keys() if \ graph.node[match[i]].get("residue") == "self") taildicts.append((resmatch, nammatch)) break if not matched: raise DabbleError("Couldn't find a match for tail %s:%s" % (resname, selection.get('resid')[0])) return taildicts
def get_names(self, selection, print_warning=False): """ Returns at atom name matching up dictionary. Does the generic moleculematcher algorithm then checks that only one resname matched since for CHARMM there is no concept of a unit and only one named residue is defined per topology. Args: selection (VMD atomsel): Selection to rename print_warning (bool): Debug output Returns: (str) resname matched (dict int->str) translation dictionary from index to atom name Raises: ValueError if more than one residue name is matched """ (resnames, atomnames) = super(CharmmMatcher, self).get_names(selection, print_warning) if not resnames: return (None, None) # Set the resname correctly after checking only one resname # matched since this is charmm resname = set(resnames.values()) if len(resname) > 1: raise DabbleError( "More than one residue name was returned as " "belonging to a single residue in CHARMM matching." " Not sure how this happened; something is really " "really wrong. Residue was: %s:%d" % (selection.get("resname")[0], selection.get("resid")[0])) return (resname.pop(), atomnames)
def _find_convertible_water_molecule( molid, # pylint: disable=invalid-name water_sel='resname TIP3', min_ion_dist=5.0): """ Finds a water molecule that can be converted to an ion Args: molid (int): VMD molid to look at water_sel (str): VMD atom selection for water min_ion_dist (float): Minimum distance between ionds Returns: (int) Atom index of a water oxygen that is convertible Raises: ValueError if no convertible water molecules are found """ inclusion_sel = 'beta 1 and noh and (%s)' % water_sel exclusion_sel = 'beta 1 and not (%s)' % water_sel sel = atomsel('(%s) and not pbwithin %f of (%s)' \ % (inclusion_sel, min_ion_dist, exclusion_sel), molid).get("index") if not len(sel): raise DabbleError("No convertible water molecules found in %s" % sel) return sel[random.randint(0, len(sel))]
def _assign_elements(self, graph): """ Assigns elements to parsed in residues. Called after all topology files are read in. Element "_join" is assigned to atoms from other residues (+- atoms), since these are only defined by name. Args: graph (networkx graph): The graph to assign elements to Raises: ValueError if an atom type can't be assigned an element """ # Now that all atom and mass lines are read, get the element for each atom for node, data in graph.nodes(data=True): if data.get('residue') != "self": typestr = ''.join([ i for i in node if not i.isdigit() and i != "+" and i != "-" ]) else: typestr = data.get('type') element = self.nodenames.get(typestr) if not element: raise DabbleError("Unknown atom type %s, name '%s'" % (typestr, node)) data['element'] = element
def check_out_type(value, forcefield, hmr=False): """ Checks the file format of the requiested output is supported, and sets internal variables as necessary. Args: value (str): Filename requested forcefield (str): Force field requested hmr (bool): If hydrogen mass repartitioning is requested Returns: The requested output format Raises: ValueError: if the output format requested is currently unsupported NotImplementedError: if hydrogen mass repartitioning is requested for amber files """ ext = value.rsplit('.')[-1] if ext == 'mae': out_fmt = 'mae' elif ext == 'pdb': out_fmt = 'pdb' elif ext == 'dms': out_fmt = 'dms' elif ext == 'psf' and "charmm" in forcefield: out_fmt = 'charmm' elif ext == 'prmtop' and forcefield in [ "amber", "charmm", "charmm36", "charmm36m" ]: out_fmt = 'amber' else: raise DabbleError("%s is an unsupported format with %s forcefield" % (value, forcefield)) if hmr and (out_fmt != 'amber'): raise DabbleError("HMR only supported with AMBER outputs!") # Check if amber forcefield can be used if forcefield == "amber" and not os.environ.get("AMBERHOME"): raise DabbleError("AMBERHOME must be set to use AMBER forcefields!") return out_fmt
def _rename_atoms_amber(self): """ Matches up atom names with those in the provided topologies and sets the atom and residue names correctly in the built molecule. Handles all non-lipid atoms. Sets the user field of all atoms to 1.0 to track which things have been written. Returns: (set of tuples (int,int)): Residue #s of disulfide bonded residues Raises: ValueError if a residue definition could not be found """ nonlips = set( atomsel("not (%s)" % self.lipid_sel, molid=self.molid).get("residue")) n_res = len(nonlips) conect = set() # Atom indices bound to noncanonical residues while nonlips: if len(nonlips) % 500 == 0: sys.stdout.write("Renaming residues.... %.0f%% \r" % (100. - 100 * len(nonlips) / float(n_res))) sys.stdout.flush() residue = nonlips.pop() sel = atomsel("residue %s" % residue) resnames, atomnames = self.matcher.get_names(sel, print_warning=False) # Check if it's a linkage to another amino acid if not resnames: resnames, atomnames, other = self.matcher.get_linkage( sel, self.molid) if not resnames: rgraph = self.matcher.parse_vmd_graph(sel)[0] write_dot(rgraph, "rgraph.dot") raise DabbleError( "ERROR: Could not find a residue definition " "for %s:%s" % (sel.get("resname")[0], sel.get("resid")[0])) print("\tBonded residue: %s:%d -> %s" % (sel.get("resname")[0], sel.get("resid")[0], list(resnames.values())[0])) conect.add(other) # Do the renaming self._apply_naming_dictionary(resnames, atomnames) atomsel('all').set('user', 1.0) sys.stdout.write("\n") return conect
def load_solute(filename, tmp_dir): """ Loads a molecule input file, guessing the format from the extension. Args: filename (str): Filename to load tmp_dir (str): Directory to put temporary files in Returns: (int) VMD molecule ID that was loaded Raises: ValueError if filetype is currently unsupported """ if len(filename) < 3: raise DabbleError("Cannot determine filetype of input file '%s'" % filename) ext = filename.split(".")[-1] if ext == 'mae': molid = molecule.load('mae', filename) elif ext == 'dms': molid = molecule.load('dms', filename) elif ext == 'mol2': molid = molecule.load('mol2', filename) elif ext == 'pdb': # Need to convert to MAE so concatenation will work later temp_mae = tempfile.mkstemp(suffix='.mae', prefix='dabble_input', dir=tmp_dir)[1] molid = molecule.load('pdb', filename) atomsel('all').write('mae', temp_mae) molecule.delete(molid) molid = molecule.load('mae', temp_mae) else: raise DabbleError("Filetype '%s' currently unsupported " "for input protein" % ext) return molid
def _remove_xy_lipids(self, molid): """ Removes residues in the +-XY direction in the system. Used to chop off lipids that are protruding outside of the box dimensions. Args: molid (int): VMD molecule id to use Returns: (int) number of atoms deleted Raises: ValueError if the 'lipid' only contains hydrogens """ # Select residues that are outside the box half_x_size = self.size[0] / 2.0 half_y_size = self.size[1] / 2.0 box_sel_str = 'abs(x) > %f or abs(y) > %f' % (half_x_size, half_y_size) # Identify lipids that have some part outside of the box suspicious_lipid_residues = list(set(atomsel('(%s) and (%s)' % \ (self.opts['lipid_sel'], box_sel_str), molid=molid).get('residue'))) bad_lipids = [] # Delete lipids whose center is too far out of the box, keep others for i in suspicious_lipid_residues: lipid_center = atomsel('noh and residue %s' % str(i), molid=molid).center() # Sanity check if not len(lipid_center): raise DabbleError( "No heavy atoms found in suspicious residue %s" "Check your input file." % str(i)) if abs(lipid_center[0]) > half_x_size or \ abs(lipid_center[1]) > half_y_size: bad_lipids.append(i) lipid_headgroup_sel = 'residue ' + ' '.join( [str(l) for l in bad_lipids]) # Do the deletion removal_sel_str = '(%s) or not (%s)' % (lipid_headgroup_sel, self.opts['lipid_sel']) total = _remove_residues( 'noh and (%s) and (%s) and not (%s)' % (box_sel_str, removal_sel_str, self.solute_sel), molid=molid) return total
def convert_ions(self, salt_conc, cation, molid): """ Calculates the charge of the molecule and adds salt ions to get the desired concentration by converting water molecules to salt Args: salt_conc (float): Desired salt concentration in M cation (str): Cation to add, either Na or K molid (int): VMD molecule id to consider Returns: (int) number of ions added Raises: ValueError if invalid cation is specified """ # Check cation if self.opts.get('cation') not in ['Na', 'K']: raise DabbleError("Invalid cation '%s'" % self.opts.get('cation')) # Give existing cations correct nomenclature molutils.set_cations(molid, cation) # Calculate number of salt ions needed pos_ions_needed, neg_ions_needed, num_wat, total_cations, total_anions, \ cation_conc, anion_conc = molutils.get_num_salt_ions_needed(molid, salt_conc, cation=cation) print("Solvent will be %d waters, %d %s (%.3f M), %d Cl (%.3f M)" % (num_wat, total_cations, self.opts.get('cation'), cation_conc, total_anions, anion_conc)) print("Converting %d waters to %d %s ions and %d Cl ions..." % (pos_ions_needed + neg_ions_needed, pos_ions_needed, cation, neg_ions_needed)) # Add the ions for _ in range(pos_ions_needed): add_salt_ion(cation, molid) for _ in range(neg_ions_needed): add_salt_ion('Cl', molid) return pos_ions_needed + neg_ions_needed
def set_cations(molid, element, filter_sel='none'): """ Sets all of the specified atoms to a cation Args: molid (int): VMD molecule ID to consider element (str in Na, K): Cation to convert filter_sel (str): VMD atom selection string for atoms to convert Raises: ValueError if invalid cation specified """ if element not in ['Na', 'K']: raise DabbleError("Invalid cation '%s'. " "Supported cations are Na, K" % element) for gid in tuple(atomsel('element K Na and not (%s)' % filter_sel)): set_ion(molid, gid, element)
def get_net_charge(sel, molid): """ Gets the net charge of an atom selection, using the charge field of the data. Args: sel (str): VMD atom selection to compute the charge of molid (int): VMD molecule id to select within Returns: (int): The rounded net charge of the selection Throws: ValueError: If charge does not round to an integer value """ charge = np.array(atomsel(sel, molid=molid).get('charge')) if charge.size == 0: return 0 print("Calculating charge on %d atoms" % charge.size) # Check the system has charges defined if all(charge == 0): print("\nWARNING: All charges in selection are zero. " "Check the input file has formal charges defined!\n" "Selection was:\n%s\n" % sel) print(set(charge)) # Round to nearest integer nd check this is okay net_charge = sum(charge) rslt = round(net_charge) if abs(rslt - net_charge) > 0.05: raise DabbleError("Total charge of %f is not integral within a " "tolerance of 0.05. Check your input file." % net_charge) return int(rslt)
def get_lipid_head(self, selection): """ Obtains a name mapping for a lipid head group given a selection describing a possible lipid. Args: selection (VMD atomsel): Selection to set names for Returns: (dict int->str) Atom index to resname matched (dict int->str) Atom index to atom name matched up (int) Atom index corresponding to - direction tail Raises: KeyError: if no matching possible """ resname = selection.get('resname')[0] rgraph = self.parse_vmd_graph(selection)[0] # Check if a lipid head group is part of this selection. # Remove _join residues from the head so that subgraph match can # be successfully completed matches = {} for matchname in (_ for _ in self.lipid_heads if self.known_res.get(_)): graph = self.known_res.get(matchname) truncated = nx.Graph(graph) truncated.remove_nodes_from([n for n in graph.nodes() if \ graph.node[n]["residue"] != "self"]) matcher = isomorphism.GraphMatcher( rgraph, truncated, node_match=self._check_atom_match) if matcher.subgraph_is_isomorphic(): matches[matchname] = next(matcher.match()) if not matches: return (None, None, None) matchname = max(matches.keys(), key=(lambda x: len(self.known_res[x]))) match = matches[matchname] graph = self.known_res.get(matchname) # Generate naming dictionaries to return nammatch = dict((i, graph.node[match[i]].get("atomname")) \ for i in match.keys() if \ graph.node[match[i]].get("residue") == "self") resmatch = dict((i, graph.node[match[i]].get("resname")) \ for i in match.keys() if \ graph.node[match[i]].get("residue") == "self") # Find atom index on non-truncated graph that corresponds to the # - direction join atom. Necessary to figure out the order in which # to list the tails. minusbnded = [_ for _ in match.keys() if match[_] in \ [e[1] for e in graph.edges(nbunch=["-"])]] if len(minusbnded) != 1: raise DabbleError( "Could not identify tail attached to lipid %s:%s!" % (resname, selection.get('resid')[0])) minusidx = [_ for _ in atomsel("index %s" % minusbnded[0]).bonds[0] \ if _ not in match.keys()] if len(minusidx) != 1: raise DabbleError( "Could not identify tail attached to lipid %s:%s!" % (resname, selection.get('resid')[0])) return (resmatch, nammatch, minusidx[0])
def get_linkage(self, selection, molid): """ Checks if the selection corresponds to a residue that is covalently bonded to some other residue other than the normal + or - peptide bonds. Sets the patch line (bond line for leap) appropriately and matches atom names using a maximal subgraph isomorphism to the normal residue. Args: selection (VMD atomsel): Selection to check molid (int): VMD molecule ID to look for other bonded residue in Returns: resnames (dict int -> str) Residue name translation dictionary atomnames (dict int -> str) Atom name translation dictionary conect (str) Leap patch line to apply for this linkage """ # Sanity check selection corresponds to one resid resids = set(selection.get("resid")) if len(resids) > 1: raise ValueError("Multiple resids in selection: %s" % resids) # Get externally bonded atoms externs = self.get_extraresidue_atoms(selection) # Create a subgraph with no externally bonded atoms for matching # Otherwise, extra bonded atom will prevent matches from happening noext, _ = self.parse_vmd_graph(selection) noext.remove_nodes_from([ i for i in noext.nodes() if noext.node[i].get("residue") != "self" ]) # Find all possible subgraph matches, only amino acids for now, otherwise # weird terminal versions like NLYS instead of LYS could be chosen matches = {} for names in self.known_res: graph = self.known_res.get(names).copy() graph.remove_nodes_from([ i for i in graph.nodes() if graph.node[i].get("residue") != "self" ]) matcher = isomorphism.GraphMatcher(noext, graph, \ node_match=super(AmberMatcher, self)._check_atom_match) if matcher.is_isomorphic(): matches[names] = matcher.match() if not matches: write_dot(noext, "noext.dot") return (None, None, None) # Want minimumally different thing, ie fewest _join atoms different def difference(res): return len(self.known_res[res]) - len(noext) minscore = min(difference(_) for _ in matches) possible_matches = [_ for _ in matches if difference(_) == minscore] # Prefer canonical amino acids here over weird other types if len(possible_matches) > 1: canonicals = [_ for _ in possible_matches if _ in self._acids] if len(canonicals) == 1: print("\tPreferring canonical acid %s" % canonicals[0]) matchname = canonicals.pop() else: raise DabbleError("Ambiguous bonded residue %s" % selection.get("resname")[0]) else: matchname = possible_matches.pop() # Invert mapping so it's idx-> name. It's backwards b/c of subgraph mapping = next(matches[matchname]) graph = self.known_res.get(matchname) # Generate naming dictionaries to return nammatch = dict((i, graph.node[mapping[i]].get("atomname")) \ for i in mapping.keys() if \ graph.node[mapping[i]].get("residue") == "self") resmatch = dict((i, graph.node[mapping[i]].get("resname")) \ for i in mapping.keys() if \ graph.node[mapping[i]].get("residue") == "self") # Find resid and fragment for other molecule partners = [] resid = selection.get("residue")[0] chain = selection.get("chain")[0] for num in externs: rid = atomsel("index %d" % num, molid=molid).get("residue")[0] ch = atomsel("index %d" % num, molid=molid).get("chain")[0] if ch != chain: partners.append(num) elif rid != resid + 1 and rid != resid - 1: partners.append(num) if len(partners) != 1: return (None, None, None) return (resmatch, nammatch, partners[0])
def _write_protein_blocks(self, molid, frag): """ Writes a protein fragment to a pdb file for input to psfgen Automatically assigns amino acid names Args: molid (int): VMD molecule ID of renumbered protein frag (str): Fragment to write Returns: (list of str): Patches to add to the psfgen input file after all proteins have been loaded """ print("Setting protein atom names") # Put our molecule on top to simplify atom selection language old_top = molecule.get_top() molecule.set_top(molid) patches = set() extpatches = set() seg = "P%s" % frag residues = list(set(atomsel("fragment '%s'" % frag).get('residue'))) for residue in residues: sel = atomsel('residue %s' % residue) resid = sel.get('resid')[0] # Only try to match single amino acid if there are 1 or 2 bonds if len(self.matcher.get_extraresidue_atoms(sel)) < 3: (newname, atomnames) = self.matcher.get_names(sel, print_warning=False) # See if it's a disulfide bond participant else: (newname, patchline, atomnames) = \ self.matcher.get_disulfide("residue %d" % residue, frag, molid) if newname: extpatches.add(patchline) # Couldn't find a match. See if it's a patched residue if not newname: (newname, patch, atomnames) = self.matcher.get_patches(sel) if newname: patches.add("patch %s %s:%d\n" % (patch, seg, resid)) # Fall through to error condition if not newname: raise DabbleError("Couldn't find a patch for %s:%s" % (sel.get('resname')[0], resid)) # Do the renaming for idx, name in atomnames.items(): atom = atomsel('index %s' % idx) if atom.get('name')[0] != name and "+" not in name and \ "-" not in name: atom.set('name', name) sel.set('resname', newname) # Save protein chain in the correct order filename = self.tmp_dir + '/psf_protein_%s.pdb' % seg _write_ordered_pdb(filename, "fragment '%s'" % frag, molid) print("\tWrote %d atoms to the protein segment %s" % (len(atomsel("fragment %s" % frag)), seg)) # Now write to psfgen input file string = ''' set protnam %s segment %s { first none last none pdb $protnam } ''' % (filename, seg) self.file.write(string) print("Applying the following single-residue patches to P%s:\n" % frag) print("\t%s" % "\t".join(patches)) self.file.write(''.join(patches)) self.file.write("\n") self.file.write("coordpdb $protnam %s\n" % seg) if old_top != -1: molecule.set_top(old_top) return extpatches
def _orient_solute(self, molid): """ Orients the solute. Can either move it explicitly in the z direction, or align to an OPM structure. Args: molid (int): VMD molecule ID to orient z_move (float): Amount to move in the Z direction z_rotation (float): Amount to rotate membrane relative to protein, can just take this straight from the OPM website value opm_pdb (str): Filename of OPM structure to align to opm_align (str): Atom selection string to align tmp_dir (str): Directory to put temporary files in Returns: (int) VMD molecule ID of oriented system Raises: ValueError if movement and alignment arguments are both specified """ # Check that OPM and alignment aren't both specified if self.opts.get('opm_pdb') and \ (self.opts.get('z_move') != 0 or self.opts.get('z_rotation') != 0): raise DabbleError("ERROR: Cannot specify an OPM pdb and " "manual orientation information") if self.opts.get('opm_pdb'): opm = molecule.load('pdb', self.opts['opm_pdb']) moveby = atomsel('protein and backbone', molid=molid).fit( \ atomsel(self.opts.get('opm_align'), molid=opm)) atomsel('all', molid=molid).move(moveby) molecule.delete(opm) return molid if self.opts.get('z_move'): atomsel('all', molid=molid).moveby((0, 0, self.opts['z_move'])) if not self.opts.get('z_rotation'): return molid if self.opts.get('z_rotation'): trans.resetview(molid) # View affect rotation matrix, now it's I # This is negative because we want membrane flat along the z-axis, # and OPM lists the membrane rotation relative to the protein theta = math.radians(-1 * self.opts['z_rotation']) # Rotation matrix in row order with 4th dimension just from I # pylint: disable=bad-whitespace, bad-continuation rotmat = [ math.cos(theta), -1 * math.sin(theta), 0, 0, math.sin(theta), math.cos(theta), 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 ] # pylint: enable=bad-whitespace, bad-continuation trans.set_rotation(molid, rotmat) return molid # Center the system according to VMD's internal metric, then # move the protein in the xy plane so that there is equal padding # on either side molid = molutils.center_system(molid=molid, tmp_dir=self.opts.get('tmp_dir'), center_z=self.water_only) system = atomsel('all', molid=molid) tx = (-max(system.get('x')) - min(system.get('x'))) / 2. ty = (-max(system.get('y')) - min(system.get('y'))) / 2. temp_mae = tempfile.mkstemp(suffix='.mae', prefix='dabble_centered', dir=self.opts.get('tmp_dir'))[1] system.moveby((tx, ty, 0)) system.write('mae', temp_mae) molecule.delete(molid) new_id = molecule.load('mae', temp_mae) return new_id
def write(self, prmtop_name): """ Creates a prmtop with either AMBER or CHARMM parameters. """ self.prmtop_name = prmtop_name # Charmm forcefield if "charmm" in self.forcefield: psfgen = CharmmWriter(molid=self.molid, tmp_dir=self.tmp_dir, lipid_sel=self.lipid_sel, extra_topos=self.extra_topos, override_defaults=self.override) self.topologies = psfgen.write(self.prmtop_name) self._psf_to_charmm_amber() # Amber forcefield elif "amber" in self.forcefield: # Initialize the matcher self.matcher = AmberMatcher(self.topologies) print("Using the following topologies:") for top in self.topologies: print(" - %s" % top.split("/")[-1]) top = os.path.abspath(top) for par in self.parameters: par = os.path.abspath(par) # Assign atom types print("Assigning AMBER atom types...") conect = self._rename_atoms_amber() # Create temporary pdb files that will be leap inputs pdbs = [] pdbs.append(self._write_lipids()) prot_pdbseqs = self._write_protein() pdbs.extend(self._write_solvent()) ligfiles = self._write_ligands() # Now invoke leap to create the prmtop and inpcrd outfile = self._run_leap(ligfiles, prot_pdbseqs, pdbs, conect) # Repartion hydrogen masses if requested if self.hmr: print("\nRepartitioning hydrogen masses...") parm = AmberParm(prm_name=outfile + ".prmtop", xyz=outfile + ".inpcrd") action = HMassRepartition(parm, "dowater") action.execute() write = parmout(action.parm, "%s.prmtop" % self.prmtop_name) #self.prmtop_name)) write.execute() parm = write.parm # Check validity of output prmtop using parmed parm = AmberParm(prm_name=self.prmtop_name + ".prmtop", xyz=self.prmtop_name + ".inpcrd") print("\nChecking for problems with the prmtop...") print(" Verify all warnings!") action = checkValidity(parm) action.execute() else: raise DabbleError("Unhandled forcefield: %s" % self.forcefield)
def get_cell_size(self, mem_buf, wat_buf, molid=None, filename=None, zh_mem_full=_MEMBRANE_FULL_THICKNESS / 2.0, zh_mem_hyd=_MEMBRANE_HYDROPHOBIC_THICKNESS / 2.0): """ Gets the cell size of the final system given initial system and buffers. Detects whether or not a membrane is present. Sets the size of the system. Args: mem_buf (float) : Membrane (xy) buffer amount wat_buf (float) : Water (z) buffer amount molid (int) : VMD molecule ID to consider (can't use with filename) filename (str) : Filename of system to consider (can't use w molid) zh_mem_full (float) : Membrane thickness zh_mem_hyd (float) : Membrane hydrophobic region thickness Returns: return dx_sol, dy_sol, dx_tm, dy_tm, dz_full (float tuple): x solute dimension, y solute dimension, TM x solute dimension, TM y solute dimension, solute z dimension Raises: ValueError: if filename and molid are both specified """ # Sanity check if filename is not None and molid is not None: raise ValueError("Specified molid and filename to get_cell_size") if filename is not None: top = molecule.get_top() molid = molecule.read(-1, 'mae', filename) elif molid is None: molid = molecule.get_top() # Some options different for water-only systems (no lipid) if self.water_only: solute_z = atomsel(self.solute_sel, molid=molid).get('z') dx_tm = 0.0 dy_tm = 0.0 sol_solute = atomsel(self.solute_sel, molid) else: solute_z = atomsel(self.solute_sel, molid=molid).get('z') tm_solute = atomsel( '(%s) and z > %f and z < %f' % (self.solute_sel, -zh_mem_hyd, zh_mem_hyd), molid) if len(tm_solute): dx_tm = max(tm_solute.get('x')) - min(tm_solute.get('x')) dy_tm = max(tm_solute.get('y')) - min(tm_solute.get('y')) else: dx_tm = dy_tm = 0 sol_solute = atomsel( '(%s) and (z < %f or z > %f)' % (self.solute_sel, -zh_mem_hyd, zh_mem_hyd), molid) # Solvent invariant options dx_sol = max(sol_solute.get('x')) - min(sol_solute.get('x')) dy_sol = max(sol_solute.get('y')) - min(sol_solute.get('y')) if self.opts.get('user_x'): self.size[0] = self.opts['user_x'] else: self.size[0] = max(dx_tm + 2. * mem_buf, dx_sol + 2. * wat_buf) if self.opts.get('user_y'): self.size[1] = self.opts['user_y'] else: self.size[1] = max(dy_tm + 2. * mem_buf, dy_sol + 2. * wat_buf) # Z dimension. If there's a membrane, need to account for asymmetry # in the Z dimension where the protein could be uneven in the membrane # or even peripheral if self.opts.get('user_z'): self.size[2] = self.opts['user_z'] buf = (self.opts['user_z'] - max(solute_z) + min(solute_z)) / 2 self._zmax = max(solute_z) + buf self._zmin = min(solute_z) - buf if zh_mem_full > self._zmax or -zh_mem_full < self._zmin: raise DabbleError("Specified user z of %f is too small to " "accomodate protein and membrane!" % self.opts['user_z']) else: if self.water_only: self._zmax = max(solute_z) + wat_buf self._zmin = min(solute_z) - wat_buf else: self._zmax = max(max(solute_z) + wat_buf, zh_mem_full) self._zmin = min(min(solute_z) - wat_buf, -zh_mem_full) self.size[2] = self._zmax - self._zmin # Cleanup temporary file, if read in if filename is not None: molecule.delete(molid) if top != -1: molecule.set_top(top) return dx_sol, dy_sol, dx_tm, dy_tm, max(solute_z) - min(solute_z)
def _write_lipids(self): """ Splits lipids into modular tail, head, tail that Lipid14 specifies. Closes the old molecule and loads the new renumbered molecule. Does name matching for lipids. Writes the pdb file with TER cards in between each lipid. Returns: (str): File name of PDB file written Raises: ValueError if an invalid lipid is found """ lipid_res = set(atomsel(self.lipid_sel).get('residue')) n_lips = len(lipid_res) if not n_lips: return None molecule.set_top(self.molid) temp = tempfile.mkstemp(suffix='.pdb', prefix='amber_lipids_', dir=self.tmp_dir)[1] fileh = open(temp, 'w') # Check if it's a normal residue first in case cholesterol etc in # the selection resid = 1 idx = 1 while lipid_res: residue = lipid_res.pop() if len(lipid_res) % 1 == 0: sys.stdout.write( "Writing lipids.... %.0f%% \r" % (100. - 100. * len(lipid_res) / float(n_lips))) sys.stdout.flush() sel = atomsel('residue %s' % residue) headres, headnam, minusidx = self.matcher.get_lipid_head(sel) # If it's not a lipid head, check if it's a normal residue if not headres: resnames, atomnames = self.matcher.get_names( sel, print_warning=False) if not resnames: raise DabbleError( "Residue %s:%s not a valid lipid" % (sel.get('resname')[0], sel.get('resid')[0])) self._apply_naming_dictionary(resnames, atomnames) sel.set('resid', resid) resid += 1 continue else: # Apply the name to the heads self._apply_naming_dictionary(headres, headnam) # Pull out the tail resnames and indices taildicts = self.matcher.get_lipid_tails(sel, headnam.keys()) for (resnames, atomnames) in taildicts: self._apply_naming_dictionary(resnames, atomnames) # Renumber the first tail, head, then second tail and write # them separately. Needs to be done this way to guarantee order. # An atom index that's in the minus tail is given by get_lipid_head. # First tail firstdict = [_ for _ in taildicts if minusidx in _[0].keys()] if len(firstdict) != 1: raise DabbleError( "Error finding tails for lipid %s:%s" % (sel.get('resname')[0], sel.get('resid')[0])) firstdict = firstdict[0] lsel = atomsel('index %s' % ' '.join([str(x) for x in \ firstdict[0].keys()])) lsel.set('resid', resid) lsel.set('user', 0.0) idx = self._write_residue(lsel, fileh, idx) taildicts.remove(firstdict) # Head lsel = atomsel('index %s' % ' '.join([str(x) for x in \ headnam.keys()])) lsel.set('resid', resid + 1) lsel.set('user', 0.0) idx = self._write_residue(lsel, fileh, idx) # Second tail lsel = atomsel('index %s' % ' '.join([str(x) for x in \ taildicts[0][0].keys()])) lsel.set('resid', resid + 2) lsel.set('user', 0.0) idx = self._write_residue(lsel, fileh, idx) resid += 3 fileh.write("TER\n") # TER card between lipid residues fileh.write("END\n") fileh.close() sys.stdout.write("\n") return temp
def __init__(self, molid, **kwargs): """ Creates an AMBER Writer. Args: molid (int): VMD molecule ID of system to write tmp_dir (str): Directory for temporary files. Defaults to "." forcefield (str): charmm36mm, charmm36, or amber lipid_sel (str): Lipid selection string. Defaults to "lipid" hmr (bool): If hydrogen masses should be repartitioned. Defaults to False. extra_topos (list of str): Additional topology (.str, .off, .lib) to include. extra_params (list of str): Additional parameter sets (.str, .frcmod) override_defaults (bool): If set, omits default amber ff14 parameters. debug_verbose (bool): Prints additional output, like from tleap. """ self.molid = molid self.prmtop_name = "" self.tmp_dir = kwargs.get("tmp_dir", ".") self.lipid_sel = kwargs.get("lipid_sel", "lipid") self.hmr = kwargs.get("hmr", False) self.extra_topos = kwargs.get("extra_topos", None) self.override = kwargs.get("override_defaults", False) self.debug_verbose = kwargs.get("debug_verbose", False) forcefield = kwargs.get("forcefield", "charmm36m") if forcefield not in ["amber", "charmm36m", "charmm", "charmm36"]: raise DabbleError("Unsupported forcefield: %s" % forcefield) self.forcefield = forcefield if self.forcefield == "charmm36m": self.parameters = [ resource_filename(__name__, "charmm_parameters/toppar_water_ions.str"), resource_filename(__name__, "charmm_parameters/par_all36_cgenff.prm"), resource_filename(__name__, "charmm_parameters/par_all36m_prot.prm"), resource_filename(__name__, "charmm_parameters/par_all36_lipid.prm"), resource_filename(__name__, "charmm_parameters/par_all36_carb.prm"), resource_filename(__name__, "charmm_parameters/par_all36_na.prm"), resource_filename( __name__, "charmm_parameters/toppar_all36_prot_na_combined.str") ] self.topologies = [] elif self.forcefield in ["charmm36", "charmm"]: self.parameters = [ resource_filename(__name__, "charmm_parameters/toppar_water_ions.str"), resource_filename(__name__, "charmm_parameters/par_all36_cgenff.prm"), resource_filename(__name__, "charmm_parameters/par_all36_prot.prm"), resource_filename(__name__, "charmm_parameters/par_all36_lipid.prm"), resource_filename(__name__, "charmm_parameters/par_all36_carb.prm"), resource_filename(__name__, "charmm_parameters/par_all36_na.prm"), resource_filename( __name__, "charmm_parameters/toppar_all36_prot_na_combined.str") ] self.topologies = [] elif self.forcefield == 'amber': if not os.environ.get("AMBERHOME"): raise DabbleError( "AMBERHOME must be set to use AMBER forcefield!") if not os.path.isfile( os.path.join(os.environ.get("AMBERHOME"), "bin", "tleap")): raise DabbleError("tleap is not present in $AMBERHOME/bin!") # Check amber version and set topologies accordingly self.topologies = [ "leaprc.protein.ff14SB", "leaprc.lipid14", "leaprc.water.tip3p", "leaprc.gaff", ] for i, top in enumerate(self.topologies): self.topologies[i] = os.path.join(os.environ["AMBERHOME"], "dat", "leap", "cmd", top) if not os.path.isfile(self.topologies[i]): raise DabbleError("AMBER version too old! " "Dabble requires >= AmberTools16!") self.parameters = [] self.matcher = None if self.override: self.topologies = [] self.parameters = [] if kwargs.get("extra_topos") is not None: self.topologies.extend(kwargs.get("extra_topos")) if kwargs.get("extra_params") is not None: self.parameters.extend(kwargs.get("extra_params")) self.prompt_params = False
def _run_leap(self, ligfiles, prot_pdbseqs, pdbs, conect): """ Runs leap, creating a prmtop and inpcrd from the given pdb and off library files. Args: ligfiles (dict str -> str): UNIT name and filename of mol2 file for each ligand. The unit name is necessary here to add the right variable names in leap because it is the worst. prot_pdbseq (tuple str,str): PDB file containing protein fragments, sequence of UNITs for those fragments pdbs (list of str): PDB or Mol2 files to combine conect (set of int): Atom indices connected by an extraresidue bond Returns: (str) Prefix of file written Raises: ValueError if AMBERHOME is unset ValueError if topology type cannot be determined """ # Ensure leap is actually available if not os.environ.get("AMBERHOME"): raise DabbleError("AMBERHOME must be set to use leap!") # Create the leap input file leapin = tempfile.mkstemp(suffix='.in', prefix='dabble_leap_', dir=self.tmp_dir)[1] with open(leapin, 'w') as fileh: for i in self.topologies + self.parameters: if "leaprc" in i: fileh.write("source %s\n" % i) elif "frcmod" in i: fileh.write("loadamberparams %s\n" % i) elif ".lib" in i: fileh.write("loadoff %s\n" % i) elif ".off" in i: continue else: raise DabbleError("Unknown topology type: %s" % i) fileh.write('\n') # Add off files here for i in [ _ for _ in self.topologies + self.parameters if ".off" in _ ]: fileh.write("loadoff %s\n" % i) pdbs = [_ for _ in pdbs if _ is not None] for i, pdb in enumerate(pdbs): if "pdb" in pdb: fileh.write("p%s = loadpdb %s\n" % (i, pdb)) elif "mol2" in pdb: fileh.write("p%s = loadmol2 %s\n" % (i, pdb)) else: raise DabbleError("Unknown coordinate type: %s" % pdb) for i, f in enumerate(ligfiles): if "pdb" in f[0]: fileh.write("l%s = loadpdbusingseq %s {%s}\n" % (i, f[0], f[1])) elif "mol2" in f[0]: fileh.write("l%s = loadmol2 %s\n" % (i, f[0])) else: raise DabbleError("Unknown ligand file type: %s" % f[0]) for i, pp in enumerate(prot_pdbseqs): fileh.write("pp%d = loadpdbusingseq %s { %s} \n" % (i, pp[0], pp[1])) # Need to combine before creating bond lines since can't create # bonds between UNITs fileh.write( "p = combine { %s }\n" % ' '.join(["pp%d" % i for i in range(len(prot_pdbseqs))])) # Create bond lines while conect: # Pull out two atoms bound to each other idx = conect.pop() s1 = atomsel("index %d" % idx) other = [s for s in s1.bonds[0] if s in conect] if len(other) != 1: raise ValueError("Problem with bonds to index %d" % idx) other = other[0] s2 = atomsel("index %d" % other) conect.remove(other) fileh.write("bond p.{0}.{1} p.{2}.{3}\n".format( s1.get('resid')[0], s1.get('name')[0], s2.get('resid')[0], s2.get('name')[0])) if len(pdbs): fileh.write("\np = combine { p %s }\n" % ' '.join(["p%d" % i for i in range(len(pdbs))])) if len(ligfiles): fileh.write("p = combine { p %s }\n" % ' '.join(["l%d" % i for i in range(len(ligfiles))])) fileh.write("setbox p centers 0.0\n") fileh.write("saveamberparm p %s.prmtop %s.inpcrd\n" % (self.prmtop_name, self.prmtop_name)) fileh.write("quit\n") fileh.close() # Now invoke leap. If it fails, print output out = "" try: out = check_output([ os.path.join(os.environ.get("AMBERHOME"), "bin", "tleap"), "-f", leapin ]).decode("utf-8") out = "%s%s%s" % ( "\n================BEGIN TLEAP OUTPUT================\n", out, "\n=================END TLEAP OUTPUT=================\n") if self.debug_verbose: print(out) if "not saved" in out: raise DabbleError("Tleap call failed") except: print(out) raise DabbleError( "Call to tleap failed! See above output for errors") quit(1) # Do a quick sanity check that all the protein is present. mademol = molecule.load("parm7", "%s.prmtop" % self.prmtop_name, "rst7", "%s.inpcrd" % self.prmtop_name) if len(atomsel("resname %s" % " ".join(self.matcher._acids), mademol)) \ != len(atomsel("resname %s" % " ".join(self.matcher._acids), self.molid)): print(out) raise DabbleError( "Not all protein was present in the output prmtop." " This indicates a problem with tleap. Check the " "above output, especially for covalent ligands. " "Is naming consistent in all .off files?") return self.prmtop_name
def _find_single_residue_names(self, resname, molid): """ Uses graph matcher and available topologies to match up ligand names automatically. Tries to use graphs, and if there's an uneven number of atoms tries to match manually to suggest which atoms are most likely missing. Args: resname (str): Residue name of the ligand that will be written. All ligands will be checked separately against the graphs. molid (int): VMD molecule ID to consider Returns: (list of ints): Residue numbers (not resid) of all input ligands that were successfully matched. Need to do it this way since residue names can be changed in here to different things. Raises: ValueError if number of resids does not match number of residues as interpreted by VMD NotImplementedError if a residue could not be matched to a graph. """ # Put our molecule on top old_top = molecule.get_top() molecule.set_top(molid) # Sanity check that there is no discrepancy between defined resids and # residues as interpreted by VMD. for chain in set(atomsel("user 1.0 and resname '%s'" % resname).get('chain')): residues = list(set(atomsel("user 1.0 and resname '%s' and chain %s" % (resname, chain)).get('residue'))) resids = list(set(atomsel("user 1.0 and resname '%s' and chain %s" % (resname, chain)).get('resid'))) if len(residues) != len(resids): raise DabbleError("VMD found %d residues for resname '%s', " "but there are %d resids! Check input." % (len(residues), resname, len(resids))) for residue in residues: sel = atomsel("residue %s and resname '%s' and user 1.0" % (residue, resname)) (newname, atomnames) = self.matcher.get_names(sel, print_warning=True) if not newname: (resname, patch, atomnames) = self.matcher.get_patches(sel) if not newname: print("ERROR: Could not find a residue definition for %s:%s" % (resname, residue)) raise NotImplementedError("No residue definition for %s:%s" % (resname, residue)) print("\tApplying patch %s to ligand %s" % (patch, newname)) # Do the renaming for idx, name in atomnames.items(): atom = atomsel('index %s' % idx) if atom.get('name')[0] != name and "+" not in name and \ "-" not in name: print("Renaming %s:%s: %s -> %s" % (resname, residue, atom.get('name')[0], name)) atom.set('name', name) sel.set('resname', newname) #logger.info("Renamed %d atoms for all resname %s->%s" % (num_renamed, resname, name)) molecule.set_top(old_top) return residues
def _parse_topology(self, filename): """ Parses an amber topology file. More specifically, parses a leaprc file. The atom type definitions are in there as "addAtomTypes" command, and the topologies in the files specified with "loadOff" command. Args: filename (str): The file to parse Returns: True if successful Raises: DabbleError if topology file is malformed in various ways DabbleError if AMBERHOME is unset """ if ".off" in filename or ".lib" in filename: self._load_off(filename) elif "frcmod" in filename: return self._load_params(filename) elif "leaprc" not in filename: raise DabbleError( "AmberMatcher only parses leaprc or frcmod topologies!" "Can't read topology '%s'" % filename) # Set AMBER search path for lib files if not os.environ.get("AMBERHOME"): raise DabbleError("AMBERHOME is unset!") leapdir = os.path.join(os.environ["AMBERHOME"], "dat", "leap") incmd = "" with open(filename, 'r') as fileh: for line in fileh: if "#" in line: line = line[:line.index("#")] if not len(line): continue tokens = [i.strip(" \t'\"\n") for i in line.split()] if not len(tokens): continue # addAtomTypes adds more atoms if not incmd and tokens[0].lower() == "addatomtypes": incmd = "addatomtypes" elif incmd == "addatomtypes": # Line should look like: { "OG" "O" "sp3" } # we need the first 2 things for atom name and element if tokens[0] == "}": # done with atom type definition incmd = "" continue if tokens[0] != "{" or tokens[-1] != "}": raise DabbleError("Malformed line in %s: %s" % (filename, line)) if not tokens[2]: logger.warning("Ignoring pseudoatom %s", tokens[1]) continue if tokens[2] not in self.MASS_LOOKUP.values() and \ tokens[2] not in self.LEAP_ELEMENTS.values(): raise DabbleError("Unknown element in %s\n: %s" % (filename, tokens[2])) self.nodenames[tokens[1]] = tokens[2] # loadOff loads a topology library # search in current directory first, then libdir elif not incmd and tokens[0].lower() == "loadoff": if len(tokens) < 2: raise DabbleError("Malformed line in %s: %s" % (filename, line)) if os.path.isfile(tokens[1]): self._load_off(tokens[1]) else: self._load_off(os.path.join(leapdir, "lib", tokens[1])) # loadamberparamsloads a frcmod file, which # may define ions elif not incmd and tokens[0].lower() == "loadamberparams": if len(tokens) < 2: raise DabbleError("Malformed line in %s: %s" % (filename, line)) if os.path.isfile(tokens[1]): self._load_params(tokens[1]) else: self._load_params( os.path.join(leapdir, "parm", tokens[1])) # can source other leaprc files within this one # search current directory first, then amber one elif not incmd and tokens[0].lower() == "source": if os.path.isfile(tokens[1]): self._parse_topology(tokens[1]) else: self._parse_topology( os.path.join(leapdir, "cmd", tokens[1])) elif incmd: raise DabbleError("Unclosed command in %s" % filename) return True
def get_disulfide(self, selstring, fragment, molid): #pylint: disable=too-many-locals """ Checks if the selection corresponds to a cysteine in a disulfide bond. Sets the patch line appropriately and matches atom names using a subgraph match to the normal cysteine residue Args: selstring (str): Selection to check fragment (str): Fragment ID (to narrow down selection) molid (int): VMD molecule of entire system (needed for disu partner) Returns: (str, str, dict) resname matched, patch line to put directly into psfgen, name translation dictionary """ selection = atomsel(selstring, molid=molid) # Check for the 3 join atoms corresponding to the disulfide bonds rgraph, _ = self.parse_vmd_graph(selection) externs = self.get_extraresidue_atoms(selection) if len(externs) != 3: return (None, None, None) # Check that it is a cysteine in some way shape or form # ie that it this residue is a subgraph of a cysteine truncated = nx.Graph(rgraph) truncated.remove_nodes_from([n for n in rgraph.nodes() if \ rgraph.node[n]["residue"] != "self"]) matches = {} for matchname in self._acids: graph = self.known_res.get(matchname) if not graph: continue matcher = isomorphism.GraphMatcher(graph, truncated, \ node_match=super(CharmmMatcher, self)._check_atom_match) if matcher.subgraph_is_isomorphic(): matches[matchname] = matcher.match() if not matches: return (None, None, None) matchname = max(matches.keys(), key=(lambda x: len(self.known_res[x]))) if matchname != "CYS": return (None, None, None) # Invert mapping so it's idx->name. It's currently backwards # because of the need to find a subgraph. atomnames = dict((v, k) for (k, v) in next(matches[matchname]).items()) # Now we know it's a cysteine in a disulfide bond # Identify which resid and fragment corresponds to the other cysteine partners = [n for n in externs if \ atomsel("index %d" % n, molid=molid).get("element")[0] == "S"] if not partners: raise DabbleError("3 bonded Cys %d isn't a valid disulfide!" % selection.get('resid')[0]) osel = atomsel("index %d" % partners[0], molid=molid) # Order so same DISU isn't listed twice fr1 = osel.get("fragment")[0] fr2 = selection.get("fragment")[0] if fr1 < fr2: first = osel second = selection elif fr1 > fr2: first = selection second = osel else: if osel.get("resid")[0] < selection.get("resid")[0]: first = osel second = selection else: first = selection second = osel patchline = "patch DISU P%d:%d P%d:%d\n" % ( first.get("fragment")[0], first.get("resid")[0], second.get("fragment")[0], second.get("resid")[0]) return (matchname, patchline, atomnames)
def _load_off(self, filename): """ Parses an off format amber library file. Puts the resulting residue definitions into the known_res dictionary. Args: filename (str): The file to parse Returns: True if successful Raises: ValueError if off file is malformed in various ways """ unit = "" incmd = "" cmdidx = 1 with open(filename, 'r') as fileh: for line in fileh: if not len(line): continue tokens = [i.strip(" \t\"\n") for i in line.split()] if not len(tokens) or not len(tokens[0]): continue # If we find a command, pull out the unit name then figure # out what section is being defined if tokens[0][0] == "!" and tokens[0][1] != "!": unit = tokens[0].split('.')[1] if tokens[0] == "!entry.%s.unit.atoms" % unit: incmd = "addatoms" elif tokens[0] == "!entry.%s.unit.connectivity" % unit: incmd = "addbonds" elif tokens[0] == "!entry.%s.unit.connect" % unit: incmd = "addextrabonds" elif tokens[0] == "!entry.%s.unit.residues" % unit: incmd = "name" else: incmd = "skip" if not self.known_res.get(unit): self.known_res[unit] = nx.Graph() graph = self.known_res[unit] cmdidx = 1 continue # Add atoms command if incmd == "addatoms": # Define atom types if not present using element index element = self.nodenames.get(tokens[1]) if not element: element = self.LEAP_ELEMENTS.get( int(tokens[6]), "Other") self.nodenames[tokens[0]] = element graph.add_node( str(cmdidx), type=tokens[1], element=element, resname=tokens[3], residue=tokens[3], # residue index, will be replaced atomname=tokens[0]) # Add bonds command elif incmd == "addbonds": node1 = graph.node.get(tokens[0]) node2 = graph.node.get(tokens[1]) if not node1 or not node2: print(node1, node2) print(graph.node.keys()) raise DabbleError( "Can't parse bond for unit %s, file %s\n" "Line was: %s" % (unit, filename, line)) graph.add_edge(tokens[0], tokens[1]) # Add externally bonded atoms command if there are actually # atoms, a 0 value here indicates no value. The - is listed before # the + so cmdidx is used to keep track of which one we're on elif incmd == "addextrabonds" and tokens[0] != "0": if cmdidx == 1: node1 = "-" else: node1 = "+" graph.add_node(node1, atomname=node1, type="", residue=node1, element="_join") if not graph.node.get(tokens[0]): raise DabbleError("Can't parse extra residue bond for " "unit %s, file %s\nLine was: %s" % (unit, filename, line)) graph.add_edge(node1, tokens[0]) elif incmd == "name": for nod in (n for n in graph.nodes() if \ graph.node[n].get("residue") == tokens[1]): # Sanity check residue name here if "*" in tokens[0]: raise DabbleError( "You have a common error in your " ".off file '%s'.\n The residue name " "is invalid. Please check the first " "field in the unit.residue section." % filename) graph.node[nod]["resname"] = tokens[0] graph.node[nod]["residue"] = "self" cmdidx += 1 return True
def _rtf_to_graph(self, data, resname, patch=None): #pylint: disable=too-many-branches """ Parses rtf text to a graph representation. If a graph to patch is provided, then patches that graph with this rtf data Args: data (str): The rtf data for this residue or patch resname (str): Residue name, from earlier parsing patch (networkx graph): The graph to apply patches to, or None if just parsing a residue. Will not be modified. Returns: (networkx graph): Graph representation of molecule, or None if it could not be converted (invalid patch) Raises: ValueError if rtf file is malformed in various ways """ graph = nx.Graph(data=patch) firstcmap = True for line in data.splitlines(): tokens = [i.strip().upper() for i in line.split()] # Atoms mean add node to current residue if tokens[0] == "ATOM": # Patches can change atom type # Technically re-adding the node will just change the type and # not add a duplicate, but this is more correct and clear. if tokens[1] in graph.nodes(): graph.node[tokens[1]]["type"] = tokens[2] else: graph.add_node(tokens[1], type=tokens[2], residue="self", patched=bool(patch)) # Bond or double means add edge to residue graph elif tokens[0] == "BOND" or tokens[0] == "DOUBLE": if len(tokens) % 2 == 0: raise DabbleError("Unequal number of atoms in bond terms\n" "Line was:\n%s" % line) for txn in range(1, len(tokens), 2): node1 = tokens[txn] node2 = tokens[txn + 1] if not _define_bond(graph, node1, node2, bool(patch)): return None # CMAP terms add edges. This makes amino acids work since the # next and previous amino acids aren't defined as bonds usually elif tokens[0] == "CMAP": if len(tokens) == 1: # CMAP parameter section follows, ignore continue if firstcmap: # Remove all +- join nodes on patching joins = [ n for n in graph.nodes() if graph.node[n]["residue"] != "self" ] graph.remove_nodes_from(joins) firstcmap = False if len(tokens) != 9: # CMAP requires 2 dihedrals raise DabbleError("Incorrect CMAP line\n" "Line was:\n%s" % line) tokens = tokens[1:] nodes = [(tokens[3*j+i], tokens[3*j+i+1]) \ for j in range(int(len(tokens)/4)) \ for i in range(j, j+3)] # oo i love one liners for (node1, node2) in nodes: if not _define_bond(graph, node1, node2, bool(patch)): return None # Check for atom definitions elif tokens[0] == "MASS": if self.nodenames.get(tokens[2]): logger.info("Skipping duplicate type %s", tokens[2]) else: self.nodenames[tokens[2]] = \ MoleculeMatcher.get_element(float(tokens[3])) # Patches can delete atoms elif tokens[0] == "DELETE" or tokens[0] == "DELE": if not patch: raise ValueError("DELETE only supported in patches!\n" "Line was:\n%s" % line) # Sometimes delete has a number in front of the atom name try: if tokens[1] == "ATOM": if tokens[2][0].isdigit(): tokens[2] = tokens[2][1:] graph.remove_node(tokens[2]) elif tokens[1] == "BOND": if tokens[2][0].isdigit(): tokens[2] = tokens[2][1:] if tokens[3][0].isdigit(): tokens[3] = tokens[3][1:] graph.remove_edge(tokens[2], tokens[3]) # Atom or bond did not exist, ie this patch is invalid except nx.NetworkXError: return None # Assign resname to all atoms nx.set_node_attributes(graph, name="resname", values=resname) # If we didn't patch, set the whole residue to unpatched atom attribute if not patch: nx.set_node_attributes(graph, name="patched", values=False) return graph