Beispiel #1
0
    def get_topologies(cls, forcefield, water_model):

        if forcefield == "amber":
            if not os.environ.get("AMBERHOME"):
                raise DabbleError("AMBERHOME must be set to use AMBER "
                                  "forcefield!")

        # Check amber version and set topologies accordingly
        ambpath = os.path.join(os.environ["AMBERHOME"], "dat", "leap", "cmd")
        topologies = [
            "leaprc.protein.ff14SB",
            "leaprc.lipid14",
            "leaprc.gaff2",
        ]
        if water_model == "tip3":
            topologies.append("leaprc.water.tip3p")
        elif water_model == "tip4e":
            topologies.append("leaprc.water.tip4pew")
        elif water_model == "spce":
            topologies.append("leaprc.water.spce")
        else:
            raise DabbleError("Water model '%s' not supported with AMBER" %
                              water_model)

        for i, top in enumerate(topologies):
            topologies[i] = os.path.abspath(os.path.join(ambpath, top))
            if not os.path.isfile(topologies[i]):
                raise DabbleError("AMBER forcefield files '%s' not found\n"
                                  "Dabble requires >= AmberTools16" % top)
        return topologies
Beispiel #2
0
    def _check_psf_output(self):
        """
        Scans the output psf from psfgen for atoms where the coordinate
        could not be set, indicating an unmatched atom. This check is necessary
        because sometimes psfgen will run with no errors or warnings but will
        have unmatched atoms that are all at (0,0,0).
        """

        # Check file was written at all
        if not os.path.isfile('%s.pdb' % self.outprefix):
            raise DabbleError("\nERROR: psf file failed to write.\n"
                              "       Please see log above.\n")

        # Open the pdb file in VMD and check for atoms with no occupancy
        fileh = molecule.load('pdb', '%s.pdb' % self.outprefix)
        errors = atomsel("occupancy=-1", molid=fileh)

        # Print out error messages
        if errors:
            errstr = "\nERROR: Couldn't find the following atoms.\n"
            for i in range(len(errors)):
                errstr += "\t%s%s:%s\n" % (errors.resname[i], errors.resid[i],
                                           errors.name[i])

            errstr += "Check if they are present in the original structure.\n"
            raise DabbleError(errstr)

        print("\nChecked output pdb/psf has all atoms present "
              "and correct.\n")
Beispiel #3
0
    def _set_water_names(self):
        """
        Sets the names of water residues and atoms according to the given
        water model. We do it this way instead of with the GraphMatcher because
        waters can have a fake bond
        """
        # Sanity check
        if self.water_model not in self.WATER_NAMES:
            raise DabbleError("Unsupported water model '%s' with forcefield "
                              "'%s'" % (self.water_model, self.forcefield))

        watres = self.WATER_NAMES[self.water_model]
        if watres not in self.matcher.known_res:
            raise DabbleError("Water resname '%s' for model '%s' not defined "
                              "in topology files" % (watres, self.water_model))

        # Set consistent residue and atom names, crystal waters
        # can be named HOH, etc
        residues = set(atomsel("water").residue)

        # If no water, nothing to do
        if not residues:
            return

        watsel = "residue %s" % ' '.join(str(_) for _ in residues)

        atomsel(watsel).resname = self.WATER_NAMES[self.water_model]
        atomsel("%s and noh" % watsel).name = self.WATER_O_NAME
        atomsel("%s and not noh" %
                watsel).name = self.WATER_H_NAMES * len(residues)
Beispiel #4
0
    def get_disulfide(self, selection, molid):
        """
        Checks if the selection corresponds to a cysteine in a disulfide bond.
        Sets the patch line appropriately and matches atom names using
        a subgraph match to the normal cysteine residue

        Args:
            selection (VMD atomsel): Selection to check
            molid (int): VMD molecule ID to look for other CYS in

        Returns:
            resnames (dict int -> str) Residue name translation dictionary
            atomnames (dict int -> str) Atom name translation dictionary
            conect (int) Residue this one is connected to
       """
        rgraph, _ = self.parse_vmd_graph(selection)

        # Sanity check
        if not self.known_res.get("CYX"):
            raise DabbleError("CYX undefined. Check forcefields!")

        # Check for the 3 join atoms corresponding to the disulfide bonds
        externs = self.get_extraresidue_atoms(selection)
        if len(externs) != 3:
            return (None, None, None)

        # With the AMBER format, the CYX residue should be a subgraph of this
        # residue as the only difference is the _join bond
        graph = self.known_res.get("CYX")
        matcher = isomorphism.GraphMatcher(rgraph, graph, \
                                           node_match=self._check_atom_match)
        if matcher.subgraph_is_isomorphic():
            # TODO: Check there's only one match
            match = next(matcher.match())
        else:
            return (None, None, None)

        # Get naming dictionaries to return
        resmatch, nammatch = self._get_names_from_match(graph, match)

        # Now we know it's a cysteine in a disulfide bond
        # Identify which resid and fragment corresponds to the other cysteine
        partners = [n for n in externs if \
                    atomsel("index %d" % n,
                            molid=molid).element[0] == "S"]
        if not partners:
            raise DabbleError("3 bonded Cys %d isn't a valid disulfide!" %
                              selection.resid[0])
        osel = atomsel("index %d" % partners[0], molid=molid)
        conect = osel.residue[0]

        return (resmatch, nammatch, conect)
Beispiel #5
0
    def get_lipid_tails(self, selection, head):
        """
        Obtains a name mapping for both ligand tails in a system given
        a selection describing the lipid and the indices of the head
        group atoms.

        Args:
            selection (VMD atomsel): Selection to pull tails from
            head (list of int): Atom indices in the head group of this lipid.
                Obtain with get_lipid_head function.

        Returns:
            (array of tuples that are dict int->str): Atom index to
                resname matched, atom index to atom name translation
                dictionaries for both tails

        Raises:
            ValueError: If a tail could not be matched or if there is an
                incorrect number of tails somehow attached.
        """
        resname = selection.resname[0]
        rgraph = self.parse_vmd_graph(selection)[0]
        rgraph.remove_nodes_from(head)

        if nx.number_connected_components(rgraph) != 2:
            raise DabbleError("Incorrect number of tails attached to %s:%s!" %
                              (resname, selection.resid[0]))

        taildicts = []
        for t in nx.connected_components(rgraph):
            tgraph = rgraph.subgraph(t)
            matched = False
            for matchname in (_ for _ in self.LIPID_TAILS if \
                              self.known_res.get(_)):
                graph = self.known_res.get(matchname)
                truncated = nx.Graph(graph)
                truncated.remove_nodes_from([n for n in graph.nodes() if \
                                             graph.node[n]["residue"] != "self"])
                matcher = isomorphism.GraphMatcher(
                    tgraph, truncated, node_match=self._check_atom_match)

                if matcher.is_isomorphic():
                    matched = True
                    match = next(matcher.match())
                    resmatch, nammatch = self._get_names_from_match(
                        graph, match)
                    taildicts.append((resmatch, nammatch))
                    break
            if not matched:
                raise DabbleError("Couldn't find a match for tail %s:%s" %
                                  (resname, selection.resid[0]))
        return taildicts
Beispiel #6
0
def check_out_type(value, outformat, forcefield, hmr=False):
    """
    Checks the file format of the requiested output is supported, and sets
    internal variables as necessary.

    Args:
      value (str): Filename requested
      outformat (str): Format requested, or None to infer from filename
      forcefield (str): Force field requested
      hmr (bool): If hydrogen mass repartitioning is requested

    Returns:
      The requested output format

    Raises:
      ValueError: if the output format requested is currently unsupported
      NotImplementedError: if hydrogen mass repartitioning is requested
                           for amber files
    """
    if outformat is not None:
        print("Will output files in %s format" % outformat)
        return outformat
    print("Inferring output format from file extension")

    ext = value.rsplit('.')[-1]
    if ext == 'mae':
        out_fmt = 'mae'
    elif ext == 'pdb':
        out_fmt = 'pdb'
    elif ext == 'dms':
        out_fmt = 'dms'
    elif ext == 'dat':
        out_fmt = 'lammps'
    elif ext == 'psf' and forcefield in ["amber", "charmm", "opls"]:
        out_fmt = 'charmm'
    elif ext == 'prmtop' and forcefield in ["amber", "charmm", "opls"]:
        out_fmt = 'amber'
    else:
        raise DabbleError("%s is an unsupported format with %s forcefield" %
                          (value, forcefield))

    if hmr and (out_fmt != 'amber'):
        raise DabbleError("HMR only supported with AMBER outputs!")

    # Check if amber forcefield can be used
    if forcefield == "amber" and not os.environ.get("AMBERHOME"):
        raise DabbleError("AMBERHOME must be set to use AMBER forcefields!")

    return out_fmt
Beispiel #7
0
    def get_parameters(cls, forcefield, water_model):

        if forcefield == "charmm":
            prms = [
                "par_all36m_prot.prm", "par_all36_cgenff.prm",
                "par_all36_lipid.prm", "par_all36_carb.prm",
                "par_all36_na.prm", "toppar_all36_prot_na_combined.str"
            ]
            if water_model == "tip3":
                prms.append("toppar_water_ions.str")
            elif water_model == "tip4e":
                prms.append("toppar_water_ions_tip4p_ew.str")
            elif water_model == "spce":
                prms.append("toppar_water_ions_spc_e.str")

        elif forcefield == "amber":
            from dabble.param import AmberWriter  # avoid circular dependency
            return AmberWriter.get_parameters(forcefield, water_model)

        elif forcefield == "opls":
            prms = ["opls_aam.prm"]
            if water_model != "tip3":
                raise DabbleError("Only TIP3 water model supported for OPLS")

        else:
            raise ValueError("Invalid forcefield: '%s'" % forcefield)

        return [cls._get_forcefield_path(par) for par in prms]
Beispiel #8
0
    def get_topologies(cls, forcefield, water_model):

        if forcefield == "charmm":
            topos = [
                "top_all36_caps.rtf", "top_all36_cgenff.rtf",
                "top_all36_prot.rtf", "top_all36_lipid.rtf",
                "top_all36_carb.rtf", "top_all36_na.rtf",
                "toppar_all36_prot_na_combined.str",
                "toppar_all36_prot_fluoro_alkanes.str"
            ]
            if water_model == "tip3":
                topos.append("toppar_water_ions.str")
            elif water_model == "tip4e":
                topos.append("toppar_water_ions_tip4p_ew.str")
            elif water_model == "spce":
                topos.append("toppar_water_ions_spc_e.str")

        elif forcefield == "opls":
            topos = ["opls_aam.rtf", "opls_aam_caps.rtf"]
            if water_model != "tip3":
                raise DabbleError("Only TIP3 water model supported for OPLS")

        elif forcefield == "amber":
            from dabble.param import AmberWriter  # avoid circular dependency
            return AmberWriter.get_topologies(forcefield, water_model)

        else:
            raise ValueError("Invalid forcefield: '%s'" % forcefield)

        return [cls._get_forcefield_path(top) for top in topos]
Beispiel #9
0
    def get_topologies(cls, forcefield, water_model):
        """
        Gets the path to GROMACS-format topologies for a given force field
        """

        # Amber, Charmm, and OPLS handled by conversion
        if forcefield == "charmm":
            return CharmmWriter.get_topologies(forcefield, water_model)

        if forcefield == "amber":
            return AmberWriter.get_topologies(forcefield, water_model)

        if forcefield == "opls":
            return CharmmWriter.get_topologies(forcefield, water_model)

        # No forcefields really ship with gromacs right now because
        # I found an error in the OPLS AA/M gromacs implementation and
        # they won't respond to my emails

        # Use GROMACS forcefield for the remaining ones
        #if forcefield == "opls":
        #    ffdir = "oplsaam.ff"

        #elif forcefield == "gromos":
        #    ffdir = "gromos54a7.ff"

        raise DabbleError("Unsupported forcefield %s" % forcefield)
Beispiel #10
0
    def get_names(self, selection, print_warning=False):
        """
        Returns at atom name matching up dictionary.
        Does the generic moleculematcher algorithm then checks that only
        one resname matched since for CHARMM there is no concept
        of a unit and only one named residue is defined per topology.

        Args:
            selection (VMD atomsel): Selection to rename
            print_warning (bool): Debug output

        Returns:
            (str) resname matched
            (dict int->str) translation dictionary from index to atom name

        Raises:
            ValueError if more than one residue name is matched
        """
        (resnames, atomnames) = super(CharmmMatcher, self).get_names(selection,
                                                                     print_warning)
        if not resnames:
            return (None, None)

        # Set the resname correctly after checking only one resname
        # matched since this is charmm
        resname = set(resnames.values())
        if len(resname) > 1:
            raise DabbleError("More than one residue name was returned as "
                              "belonging to a single residue in CHARMM matching."
                              " Not sure how this happened; something is really "
                              "really wrong. Residue was: %s:%d" %
                              (selection.resname[0],
                               selection.resid[0]))

        return (resname.pop(), atomnames)
Beispiel #11
0
    def _parse_atp(self, filename):
        """
        Parses an atom types definition file, populating the elements
        table.

        Args:
            filename (str): .atp file to parse
        Returns:
            True on success
        """

        with open(filename, 'r') as fileh:
            lines = fileh.readlines()

        for line in lines:
            line = line.strip()
            tokens = [i.strip(" \t\n") for i in line.split()]
            if not tokens or not tokens[0]:
                continue

            # Comment lines start with ';'
            if tokens[0][0] == ";":
                continue

            try:
                element = self.get_element(float(tokens[1]))
            except:
                raise DabbleError("Problem parsing line:\n%s" % line)

            if self.nodenames.get(tokens[0]):
                logging.info("Already have element %s defined", element)
            else:
                self.nodenames[tokens[0]] = element

        return True
Beispiel #12
0
    def _assign_elements(self, graph):
        """
        Assigns elements to parsed in residues. Called after all
        topology files are read in. Element "Any" is assigned
        to atoms from other residues (+- atoms), since these are only
        defined by name.

        Args:
            graph (networkx graph): The graph to assign elements to

        Raises:
            ValueError if an atom type can't be assigned an element
        """
        # Now that all atom and mass lines are read, get the element for each atom
        for node, data in graph.nodes(data=True):
            if data.get('residue') != "self":
                element = "Any"
            else:
                element = self.nodenames.get(data.get('type'))

            if not element:
                self.write_dot(graph, "invalid_type.dot")
                raise DabbleError("Unknown atom type %s, name '%s'.\nDumping "
                                  "graph as invalid_type.dot"
                                  % (data.get("type"), node))
            data['element'] = element
Beispiel #13
0
    def _rename_atoms_amber(self):
        """
        Matches up atom names with those in the provided topologies and
        sets the atom and residue names correctly in the built molecule.
        Handles all non-lipid atoms. Sets the user field of all atoms to 1.0
        to track which things have been written.

        Returns:
            (set of tuples (int,int)): Residue #s of disulfide or otherwise
                noncanonically linked residues

        Raises:
            ValueError if a residue definition could not be found
        """

        self._set_water_names()
        nonlips = set(
            atomsel("not (water or %s)" % self.lipid_sel,
                    molid=self.molid).residue)
        n_res = len(nonlips)
        conect = set()  # Atom indices bound to noncanonical residues
        while nonlips:
            if len(nonlips) % 500 == 0:
                print("Renaming residues.... %.0f%%  \r" %
                      (100. - 100 * len(nonlips) / float(n_res)),
                      flush=True)

            residue = nonlips.pop()
            sel = atomsel("residue %s" % residue)
            resnames, atomnames = self.matcher.get_names(sel,
                                                         print_warning=False)

            # Check if it's a linkage to another amino acid
            if not resnames:
                resnames, atomnames, other = self.matcher.get_linkage(
                    sel, self.molid)
                if not resnames:
                    rgraph = self.matcher.parse_vmd_graph(sel)[0]
                    self.matcher.write_dot(rgraph, "rgraph.dot")
                    raise DabbleError(
                        "ERROR: Could not find a residue definition "
                        "for %s:%s" % (sel.resname[0], sel.resid[0]))

                print(
                    "\tBonded residue: %s:%d -> %s" %
                    (sel.resname[0], sel.resid[0], list(resnames.values())[0]))
                conect.add(other)

            # Do the renaming
            self._apply_naming_dictionary(resnames=resnames,
                                          atomnames=atomnames)

        atomsel('all').user = 1.0
        print("\n", flush=True)
        return conect
Beispiel #14
0
    def __init__(self, molid, **kwargs):
        """
        Creates a CHARMM writer

        Args:
            molid (int): VMD molecule ID of system to write
            tmp_dir (str): Directory for temporary files. Defaults to "."
            lipid_sel (str): Lipid selection string. Defaults to "lipid"
            hmr (bool): If hydrogen masses should be repartitioned. Defaults
                to False
            forcefield (str): Forcefield to use, either "charmm" or "amber"
            water_model (str): Water model to use
            extra_topos (list of str): Additional topology (.str, .off, .lib) to
                include.
            extra_params (list of str): Additional parameter sets (.str, .frcmod)
            override_defaults (bool): If set, omits default forcefield parameters.
            debug_verbose (bool): Prints additional output, like from psfgen.
        """

        # Initialize default options
        super(CharmmWriter, self).__init__(molid, **kwargs)

        # Create a psf generator object
        self.psfgen = PsfGen()

        # Set forcefield default topologies and parameters
        self.forcefield = kwargs.get("forcefield", "charmm")
        self.water_model = kwargs.get("water_model", "tip3")

        self.topologies = self.get_topologies(self.forcefield,
                                              self.water_model)
        self.parameters = self.get_parameters(self.forcefield,
                                              self.water_model)

        if "charmm" in self.forcefield:
            if self.hmr:
                raise DabbleError("HMR not supported with CHARMM ff yet")

        # Handle override and extra topologies
        if self.override:
            self.topologies = []
            self.parameters = []

        # Now extra topologies (put in self by super __init__)
        self.topologies.extend(self.extra_topos)
        self.parameters.extend(self.extra_params)

        # Once all topologies defined, initialize matcher only if
        # using CHARMM topologies (not if we're doing a conversion)
        if "charmm" in self.forcefield or "opls" in self.forcefield:
            self.matcher = CharmmMatcher(self.topologies)

        # Keep track of segment numbers for protein and other
        self.segint = 0
Beispiel #15
0
    def _parse_topology(self, filename):
        """
        Parses a gromacs forcefield directory. Reads all atom types from the
        atomtypes.atp file, parses all .itp topology files, and reads
        specbonds.dat for special bonds

        Args:
            filename (str): The folder to parse (should end in .ff)

        Returns:
            True if successful

        Raises:
            DabbleError if topology file is malformed in various ways
            DabbleError if gromacs installation cannot be found
        """
        # If .itp file only, just parse it. Otherwise, expect a directory
        if not os.path.isdir(filename):
            if os.path.splitext(filename)[1] == ".itp":
                return self._parse_itp(filename)
            raise DabbleError("GROMACS forcefields are specified by a "
                              "directory, got '%s'" % filename)

        # Ensure atomtypes.atp is present
        if not os.path.isfile(os.path.join(filename, "atomtypes.atp")):
            raise DabbleError("atomtypes.atp not present in GROMACS "
                              "forcefield directory '%s'" % filename)

        # Parse atom types first
        self._parse_atp(os.path.join(filename, "atomtypes.atp"))

        for file in os.listdir(filename):
            ext = os.path.splitext(file)[1]
            if ext == ".itp":
                self._parse_itp(os.path.join(filename, file))
            elif ext == ".rtp":
                self._parse_rtp(os.path.join(filename, file))

        return True
Beispiel #16
0
    def write(self, filename):
        """
        Writes the parameter and topology files.

        Args:
            filename (str): File name to write. Gromacs suffix will be added.
        """
        self.outprefix = filename

        # Charmm forcefield
        if "charmm" in self.forcefield or "opls" in self.forcefield:
            psfgen = CharmmWriter(molid=self.molid,
                                  tmp_dir=self.tmp_dir,
                                  lipid_sel=self.lipid_sel,
                                  forcefield=self.forcefield,
                                  water_model=self.water_model,
                                  hmr=self.hmr,
                                  extra_topos=self.extra_topos,
                                  extra_params=self.extra_params,
                                  override_defaults=self.override)
            print("Writing intermediate psf")
            psfgen.write(self.outprefix)
            self._psf_to_gromacs()

        elif "amber" in self.forcefield:
            prmgen = AmberWriter(molid=self.molid,
                                 tmp_dir=self.tmp_dir,
                                 forcefield=self.forcefield,
                                 water_model=self.water_model,
                                 hmr=self.hmr,
                                 lipid_sel=self.lipid_sel,
                                 extra_topos=self.extra_topos,
                                 extra_params=self.extra_params,
                                 override_defaults=self.override)
            print("Writing intermediate prmtop")
            prmgen.write(self.outprefix)
            self._amber_to_gromacs()

        # Now native GROMACS style for gromos or opls
        else:
            # Currently unsupported
            raise DabbleError("Forcefield '%s' not supported for gromacs" %
                              self.forcefield)

            print("Using the following topology files and/or directories:")
            for top in self.topologies:
                print("  - %s" % os.path.split(top)[1])

            self._set_atom_names()
            self._run_pdb2gmx()
Beispiel #17
0
    def __init__(self, topologies):
        """
        Initializes a graph parser with the given topology files
        as known molecules
        """
        # Require AMBERHOME to be set
        if not os.environ.get("AMBERHOME"):
            raise DabbleError("AMBERHOME must be set to use AmberMatcher")

        # Parent calls parse topologies
        super(AmberMatcher, self).__init__(topologies=topologies)

        # Add the water without TIP3 bond
        self._load_off(resource_filename(__name__, "parameters/hoh.lib"))
Beispiel #18
0
def load_solute(filename, tmp_dir):
    """
    Loads a molecule input file, guessing the format from the extension.

    Args:
      filename (str): Filename to load
      tmp_dir (str): Directory to put temporary files in

    Returns:
      (int) VMD molecule ID that was loaded

    Raises:
      ValueError if filetype is currently unsupported
    """
    if len(filename) < 3:
        raise DabbleError("Cannot determine filetype of input file '%s'" %
                          filename)
    ext = filename.split(".")[-1]
    if ext == 'mae':
        molid = molecule.load('mae', filename)
    elif ext == 'dms':
        molid = molecule.load('dms', filename)
    elif ext == 'mol2':
        molid = molecule.load('mol2', filename)
    elif ext == 'pdb':
        # Need to convert to MAE so concatenation will work later
        temp_mae = tempfile.mkstemp(suffix='.mae',
                                    prefix='dabble_input',
                                    dir=tmp_dir)[1]
        molid = molecule.load('pdb', filename)
        atomsel('all').write('mae', temp_mae)
        molecule.delete(molid)
        molid = molecule.load('mae', temp_mae)
    else:
        raise DabbleError("Filetype '%s' currently unsupported "
                          "for input protein" % ext)
    return molid
Beispiel #19
0
    def write(self, filename):
        """
        Writes the parameter and topology files

        Args:
            filename (str): File name to write. File type suffix will be added.
        """
        self.outprefix = filename

        # Put our molecule on top
        old_top = molecule.get_top()
        molecule.set_top(self.molid)

        # Amber forcefield done with AmberWriter then conversion
        if "amber" in self.forcefield:
            # Avoid circular import by doing it here
            from dabble.param import AmberWriter
            prmtopgen = AmberWriter(molid=self.molid,
                                    tmp_dir=self.tmp_dir,
                                    forcefield=self.forcefield,
                                    water_model=self.water_model,
                                    hmr=self.hmr,
                                    lipid_sel=self.lipid_sel,
                                    extra_topos=self.extra_topos,
                                    extra_params=self.extra_params,
                                    override_defaults=self.override,
                                    debug_verbose=self.debug)
            prmtopgen.write(self.outprefix)
            self._prmtop_to_charmm()

        # Charmm forcefield
        elif "charmm" in self.forcefield:
            self._run_psfgen()

        # OPLS forcefield. Same as charmm but list separately for readability
        elif "opls" in self.forcefield:
            self._run_psfgen()

        else:
            raise DabbleError("Unsupported forcefield '%s' for CharmmWriter" %
                              self.forcefield)

        # Check output and finish up
        self._check_psf_output()

        # Reset top molecule
        molecule.set_top(old_top)
Beispiel #20
0
def check_write_ok(filename, out_fmt, overwrite=False):
    """
    Checks if the output files for the requested format exists,
    and prints out an error message if the current options
    don't allow overwriting them.

    Args:
      filename (str): Output filename requested
      out_fmt (str): Output format requested. All intermediate
      files involved in writing to this format will be checked for
      existence.
      overwrite (bool): True if overwriting is allowed

    Returns:
      True if it okay to overwrite, False otherwise
    """
    if overwrite is True:
        return True

    # Generate file suffixes to search for
    prefix = '.'.join(filename.split('.')[:-1])
    suffixes = ['mae']
    if out_fmt == 'desmond':
        suffixes.append('dms')
    elif out_fmt == 'pdb':
        suffixes.append('pdb')
    elif out_fmt == 'charmm':
        suffixes.extend(['psf', 'pdb'])
    elif out_fmt == 'amber':
        suffixes.extend(['prmtop', 'inpcrd'])
    elif out_fmt == 'gromacs':
        suffixes.extend(['.gro', '.top'])
    elif out_fmt == 'lammps':
        suffixes.extend(['.dat'])

    exists = []
    for sfx in suffixes:
        if os.path.isfile('%s.%s' % (prefix, sfx)):
            exists.append('%s.%s' % (prefix, sfx))

    if exists:
        raise DabbleError("\nERROR: The following files exist and would be "
                          "overwritten:\n%s\n\tWon't overwrite unless -O "
                          "specified" % ' '.join(exists))

    return False
Beispiel #21
0
def set_cations(molid, element, filter_sel='none'):
    """
    Sets all of the specified atoms to a cation

    Args:
      molid (int): VMD molecule ID to consider
      element (str in Na, K): Cation to convert
      filter_sel (str): VMD atom selection string for atoms to convert

    Raises:
      ValueError if invalid cation specified
    """

    if element not in ['Na', 'K']:
        raise DabbleError("Invalid cation '%s'. "
                          "Supported cations are Na, K" % element)

    for gid in tuple(atomsel('element K Na and not (%s)' % filter_sel)):
        set_ion(molid, gid, element)
Beispiel #22
0
def get_net_charge(sel, molid):
    """
    Gets the net charge of an atom selection, using the charge
    field of the data.

    Args:
      sel (str): VMD atom selection to compute the charge of
      molid (int): VMD molecule id to select within

    Returns:
      (int): The rounded net charge of the selection

    Throws:
      ValueError: If charge does not round to an integer value
    """

    charge = np.array(atomsel(sel, molid=molid).charge)
    if charge.size == 0:
        return 0
    print("Calculating charge on %d atoms" % charge.size)

    # Check the system has charges defined
    if all(charge == 0):
        print("\nWARNING: All charges in selection are zero. "
              "Check the input file has formal charges defined!\n"
              "Selection was:\n%s\n"%sel)
        print(set(charge))

    # Round to nearest integer nd check this is okay
    net_charge = sum(charge)
    rslt = round(net_charge)
    if abs(rslt - net_charge) > 0.05:
        raise DabbleError("Total charge of %f is not integral within a "
                          "tolerance of 0.05. Check your input file."
                          % net_charge)

    return int(rslt)
Beispiel #23
0
    def _parse_topology(self, filename): #pylint: disable=too-many-branches
        """
        Parses a topology file and pulls out the defined residues into
        graph representation.
        First pulls out atom types that are defined and updates nodenames,
        then pulls out defined residues and updates known_res.
        Also pulls out known patches as it goes

        Args:
            filename (str): The file to parse

        Returns:
            True if successful

        Raises:
            ValueError if topology file is malformed in various ways
        """
        resname = ""
        data = ""
        patch = False

        with open(filename, 'r') as fileh:
            for line in fileh:
                # Remove comments except "special" graphmatcher directives
                # This directive is only really used to parse the bond on NMA
                # that attaches to the previous residue, in order for its extra
                # connection to be properly registered since chamber fails
                # if a connection is listed twice
                if "!GraphMatcher:" in line:
                    line = line.replace("!GraphMatcher:", "")
                if "!" in line:
                    line = line[:line.index("!")]
                if not line:
                    continue
                tokens = [i.strip() for i in line.split()]
                if not tokens:
                    continue

                # Handle previous data
                if data and (tokens[0] == "RESI" or tokens[0] == "PRES"):
                    if patch:
                        self.patches[resname] = data
                    else:
                        self.known_res[resname] = self._rtf_to_graph(data, resname)
                    data = ""

                # Handle new residue definition
                if tokens[0] == "RESI":
                    resname = tokens[1]
                    # Only warn for too long str files
                    if len(resname) > 4 and filename.split('.')[-1] == "str":
                        raise DabbleError("Residue name '%s' too long for psfgen"
                                          " to parse. Max is 4 characters!"
                                          % resname)
                    patch = False
                    if self.known_res.get(resname):
                        logging.info("Skipping duplicate residue %s", resname)
                        # TODO define as a different residue name???
                        # Currently reads in first file's definition, ignores others
                        resname = "_skip"
                # PRES is a patch
                elif tokens[0] == "PRES":
                    resname = tokens[1] # prefix with _ so we can tell it's a patch
                    if len(resname) > 10:
                        raise DabbleError("Patch name '%s' too long for psfgen"
                                          " to parse. Max is 10 characters."
                                          % resname)
                    patch = True
                    if self.patches.get(resname):
                        logging.warning("Skipping duplicate patch %s", resname[1:])
                # Check for atom definitions
                elif tokens[0] == "MASS":
                    if self.nodenames.get(tokens[2]):
                        logger.info("Skipping duplicate type %s", tokens[2])
                    else:
                        self.nodenames[tokens[2]] = \
                                MoleculeMatcher.get_element(float(tokens[3]))
                elif resname and resname != "_skip":
                    data += ' '.join(tokens) + '\n'

        # Write out final residue
        if data:
            if patch:
                self.patches[resname] = data
            else:
                self.known_res[resname] = self._rtf_to_graph(data, resname)

        return True
Beispiel #24
0
    def get_disulfide(self, selstring, molid): #pylint: disable=too-many-locals
        """
        Checks if the selection corresponds to a cysteine in a disulfide bond.
        Sets the patch line appropriately and matches atom names using
        a subgraph match to the normal cysteine residue

        Args:
            selstring (str): Selection to check
            molid (int): VMD molecule of entire system (needed for disu partner)

        Returns:
            (str, Patch, dict) resname matched, patch object for psfgen,
                name translation dictionary
       """
        selection = atomsel(selstring, molid=molid)

        # Check for the 3 join atoms corresponding to the disulfide bonds
        rgraph, _ = self.parse_vmd_graph(selection)
        externs = self.get_extraresidue_atoms(selection)
        if len(externs) != 3:
            return (None, None, None)

        # Check that it is a cysteine in some way shape or form
        # ie that it this residue is a subgraph of a cysteine
        truncated = nx.Graph(rgraph)
        truncated.remove_nodes_from([n for n in rgraph.nodes() if \
                                     rgraph.node[n]["residue"] != "self"])
        matches = {}
        for matchname in self.AMINO_ACIDS:
            graph = self.known_res.get(matchname)
            if not graph:
                continue

            matcher = isomorphism.GraphMatcher(graph, truncated, \
                        node_match=super(CharmmMatcher, self)._check_atom_match)
            if matcher.subgraph_is_isomorphic():
                matches[matchname] = matcher.match()

        if not matches:
            return (None, None, None)
        matchname = max(matches.keys(), key=(lambda x: len(self.known_res[x])))
        if matchname != "CYS":
            return (None, None, None)

        # Invert mapping so it's idx->name. It's currently backwards
        # because of the need to find a subgraph.
        atomnames = dict((v, k) for (k, v) in next(matches[matchname]).items())

        # Now we know it's a cysteine in a disulfide bond
        # Identify which resid and fragment corresponds to the other cysteine
        partners = [n for n in externs if \
                    atomsel("index %d" % n, molid=molid).element[0] == "S"]
        if not partners:
            raise DabbleError("3 bonded Cys %d isn't a valid disulfide!"
                              % selection.resid[0])
        osel = atomsel("index %d" % partners[0], molid=molid)

        # Order so same DISU isn't listed twice
        fr1 = osel.fragment[0]
        fr2 = selection.fragment[0]
        if fr1 < fr2:
            first = osel
            second = selection
        elif fr1 > fr2:
            first = selection
            second = osel
        else:
            if osel.resid[0] < selection.resid[0]:
                first = osel
                second = selection
            else:
                first = selection
                second = osel

        patch = Patch(name="DISU",
                      segids=[
                          self.get_protein_segname(molid, first.fragment[0]),
                          self.get_protein_segname(molid, second.fragment[0])
                      ],
                      resids=[first.resid[0],
                              second.resid[0]])

        return (matchname, patch, atomnames)
Beispiel #25
0
    def _parse_topology(self, filename):
        """
        Parses an amber topology file. More specifically, parses a leaprc
        file. The atom type definitions are in there as "addAtomTypes" command,
        and the topologies in the files specified with "loadOff" command.

        Args:
            filename (str): The file to parse

        Returns:
            True if successful

        Raises:
            DabbleError if topology file is malformed in various ways
        """
        if ".off" in filename or ".lib" in filename:
            self._load_off(filename)
        elif "frcmod" in filename:
            return self._load_params(filename)
        elif "leaprc" not in filename:
            raise DabbleError("AmberMatcher only parses .leaprc, .off, or "
                              ".frcmod topologies! Can't read topology '%s'" %
                              filename)

        # Set AMBER search path for lib files
        leapdir = os.path.join(os.environ["AMBERHOME"], "dat", "leap")

        incmd = ""
        with open(filename, 'r') as fileh:
            for line in fileh:
                if "#" in line:
                    line = line[:line.index("#")]
                if not line:
                    continue
                tokens = [i.strip(" \t'\"\n") for i in line.split()]
                if not tokens:
                    continue

                # addAtomTypes adds more atoms
                if not incmd and tokens[0].lower() == "addatomtypes":
                    incmd = "addatomtypes"
                elif incmd == "addatomtypes":
                    # Line should look like: { "OG" "O" "sp3" }
                    # we need the first 2 things for atom name and element
                    if tokens[0] == "}":  # done with atom type definition
                        incmd = ""
                        continue
                    if tokens[0] != "{" or tokens[-1] != "}":
                        raise DabbleError("Malformed line in %s: %s" %
                                          (filename, line))
                    if not tokens[2]:
                        self.pseudoatoms.append(tokens[1])
                        continue

                    if tokens[2] not in self.MASS_LOOKUP.values() and \
                       tokens[2] not in self.LEAP_ELEMENTS.values():
                        raise DabbleError("Unknown element in %s\n: %s" %
                                          (filename, tokens[2]))
                    self.nodenames[tokens[1]] = tokens[2]

                # loadOff loads a topology library
                # search in current directory first, then libdir
                elif not incmd and tokens[0].lower() == "loadoff":
                    if len(tokens) < 2:
                        raise DabbleError("Malformed line in %s: %s" %
                                          (filename, line))
                    if os.path.isfile(tokens[1]):
                        self._load_off(tokens[1])
                    else:
                        self._load_off(os.path.join(leapdir, "lib", tokens[1]))

                # loadamberparamsloads a frcmod file, which
                # may define ions
                elif not incmd and tokens[0].lower() == "loadamberparams":
                    if len(tokens) < 2:
                        raise DabbleError("Malformed line in %s: %s" %
                                          (filename, line))
                    if os.path.isfile(tokens[1]):
                        self._load_params(tokens[1])
                    else:
                        self._load_params(
                            os.path.join(leapdir, "parm", tokens[1]))

                # can source other leaprc files within this one
                # search current directory first, then amber one
                elif not incmd and tokens[0].lower() == "source":
                    if os.path.isfile(tokens[1]):
                        self._parse_topology(tokens[1])
                    else:
                        self._parse_topology(
                            os.path.join(leapdir, "cmd", tokens[1]))
                elif incmd:
                    raise DabbleError("Unclosed command in %s" % filename)

        return True
Beispiel #26
0
    def get_lipid_head(self, selection):
        """
        Obtains a name mapping for a lipid head group given a selection
        describing a possible lipid.

        Args:
            selection (VMD atomsel): Selection to set names for

        Returns:
            (dict int->str) Atom index to resname matched
            (dict int->str) Atom index to atom name matched up
            (int) Atom index corresponding to - direction tail

        Raises:
            KeyError: if no matching possible
        """

        resname = selection.resname[0]
        rgraph = self.parse_vmd_graph(selection)[0]

        # Check if a lipid head group is part of this selection.
        # Remove _join residues from the head so that subgraph match can
        # be successfully completed
        matches = {}
        for matchname in (_ for _ in self.LIPID_HEADS
                          if self.known_res.get(_)):
            graph = self.known_res.get(matchname)
            truncated = nx.Graph(graph)
            truncated.remove_nodes_from([n for n in graph.nodes() if \
                                         graph.node[n]["residue"] != "self"])
            matcher = isomorphism.GraphMatcher(
                rgraph, truncated, node_match=self._check_atom_match)
            if matcher.subgraph_is_isomorphic():
                matches[matchname] = next(matcher.match())

        if not matches:
            return (None, None, None)
        matchname = max(matches.keys(), key=(lambda x: len(self.known_res[x])))
        match = matches[matchname]
        graph = self.known_res.get(matchname)

        # Get naming dictionaries to return
        resmatch, nammatch = self._get_names_from_match(graph, match)

        # Find atom index on non-truncated graph that corresponds to the
        # - direction join atom. Necessary to figure out the order in which
        # to list the tails.
        minusbnded = [_ for _ in match.keys() if match[_] in \
                      [e[1] for e in graph.edges(nbunch=["-"])]]
        if len(minusbnded) != 1:
            raise DabbleError(
                "Could not identify tail attached to lipid %s:%s!" %
                (resname, selection.resid[0]))
        minusidx = [_ for _ in atomsel("index %s" % minusbnded[0]).bonds[0] \
                    if _ not in match.keys()]
        if len(minusidx) != 1:
            raise DabbleError(
                "Could not identify tail attached to lipid %s:%s!" %
                (resname, selection.resid[0]))

        return (resmatch, nammatch, minusidx[0])
Beispiel #27
0
    def _rtf_to_graph(self, data, resname, patch=None): #pylint: disable=too-many-branches
        """
        Parses rtf text to a graph representation. If a graph to patch
        is provided, then patches that graph with this rtf data

        Args:
            data (str): The rtf data for this residue or patch
            resname (str): Residue name, from earlier parsing
            patch (networkx graph): The graph to apply patches to,
              or None if just parsing a residue. Will not be modified.

        Returns:
            (networkx graph): Graph representation of molecule, or None
              if it could not be converted (invalid patch)

        Raises:
            ValueError if rtf file is malformed in various ways
        """

        # They changed the copy keyword after version 2.1 so that
        # graph attributes can have more names
        if nx.__version__ >= "2.1":
            graph = nx.Graph(incoming_graph_data=patch)
        else:
            graph = nx.Graph(data=patch)

        for line in data.splitlines():
            tokens = [i.strip().upper() for i in line.split()]

            # Atoms mean add node to current residue
            if tokens[0] == "ATOM":
                # Patches can change atom type
                # Technically re-adding the node will just change the type and
                # not add a duplicate, but this is more correct and clear.
                if tokens[1] in graph.nodes():
                    graph.node[tokens[1]]["type"] = tokens[2]
                else:
                    graph.add_node(tokens[1], type=tokens[2],
                                   atomname=tokens[1],
                                   residue="self",
                                   patched=bool(patch))

            # Bond or double means add edge to residue graph
            elif tokens[0] == "BOND" or tokens[0] == "DOUBLE":
                if len(tokens) % 2 == 0:
                    raise DabbleError("Unequal number of atoms in bond terms\n"
                                      "Line was:\n%s" % line)
                for txn in range(1, len(tokens), 2):
                    node1 = tokens[txn]
                    node2 = tokens[txn+1]
                    if not _define_bond(graph, node1, node2, bool(patch)):
                        if patch:
                            return None
                        raise DabbleError("Could not bond atoms '%s' - '%s' "
                                          "when parsing rtf file.\n"
                                          "Line was:\n%s"
                                          % (node1, node2, line))

            # Check for atom definitions
            elif tokens[0] == "MASS":
                if self.nodenames.get(tokens[2]):
                    logger.info("Skipping duplicate type %s", tokens[2])
                else:
                    self.nodenames[tokens[2]] = \
                            MoleculeMatcher.get_element(float(tokens[3]))

            # Patches can delete atoms
            elif tokens[0] == "DELETE" or tokens[0] == "DELE":
                if not patch:
                    raise ValueError("DELETE only supported in patches!\n"
                                     "Line was:\n%s" % line)

                # Sometimes delete has a number in front of the atom name
                try:
                    if tokens[1] == "ATOM":
                        if tokens[2][0].isdigit():
                            tokens[2] = tokens[2][1:]
                        graph.remove_node(tokens[2])
                    elif tokens[1] == "BOND":
                        if tokens[2][0].isdigit():
                            tokens[2] = tokens[2][1:]
                        if tokens[3][0].isdigit():
                            tokens[3] = tokens[3][1:]

                        graph.remove_edge(tokens[2], tokens[3])

                # Atom or bond did not exist, ie this patch is invalid
                except nx.NetworkXError:
                    return None


        # Assign resname to all atoms
        nx.set_node_attributes(graph, name="resname", values=resname)

        # If we didn't patch, set the whole residue to unpatched atom attribute
        # If we are patching, new atoms will have that attribute set when
        # they are added.
        if not patch:
            nx.set_node_attributes(graph, name="patched", values=False)

        return graph
Beispiel #28
0
    def _load_off(self, filename):
        """
        Parses an off format amber library file. Puts the resulting
        residue definitions into the known_res dictionary.

        Args:
            filename (str): The file to parse

        Returns:
            True if successful

        Raises:
            ValueError if off file is malformed in various ways
        """
        unit = ""
        incmd = ""
        cmdidx = 1

        with open(filename, 'r') as fileh:
            for line in fileh:
                if not line:
                    continue
                tokens = [i.strip(" \t\"\n") for i in line.split()]
                if not tokens or not tokens[0]:
                    continue

                # If we find a command, pull out the unit name then figure
                # out what section is being defined
                if tokens[0][0] == "!" and tokens[0][1] != "!":
                    unit = tokens[0].split('.')[1]
                    if tokens[0] == "!entry.%s.unit.atoms" % unit:
                        incmd = "addatoms"
                    elif tokens[0] == "!entry.%s.unit.connectivity" % unit:
                        incmd = "addbonds"
                    elif tokens[0] == "!entry.%s.unit.connect" % unit:
                        incmd = "addextrabonds"
                    elif tokens[0] == "!entry.%s.unit.residues" % unit:
                        incmd = "name"
                    else:
                        incmd = "skip"
                    if not self.known_res.get(unit):
                        self.known_res[unit] = nx.Graph()

                    graph = self.known_res[unit]
                    cmdidx = 1
                    continue

                # Add atoms command
                if incmd == "addatoms":
                    # Define atom types if not present using element index
                    element = self.nodenames.get(tokens[1])
                    if not element:
                        element = self.LEAP_ELEMENTS.get(
                            int(tokens[6]), "Other")
                        self.nodenames[tokens[0]] = element

                    graph.add_node(
                        str(cmdidx),
                        type=tokens[1],
                        element=element,
                        resname=tokens[3],
                        residue=tokens[3],  # residue index, will be replaced
                        atomname=tokens[0])

                # Add bonds command
                elif incmd == "addbonds":
                    node1 = graph.node.get(tokens[0])
                    node2 = graph.node.get(tokens[1])
                    if not node1 or not node2:
                        print(node1, node2)
                        print(graph.node.keys())
                        raise DabbleError(
                            "Can't parse bond for unit %s, file %s\n"
                            "Line was: %s" % (unit, filename, line))
                    graph.add_edge(tokens[0], tokens[1])

                # Add externally bonded atoms command if there are actually
                # atoms, a 0 value here indicates no value. The - is listed before
                # the + so cmdidx is used to keep track of which one we're on
                elif incmd == "addextrabonds" and tokens[0] != "0":
                    if cmdidx == 1:
                        node1 = "-"
                    else:
                        node1 = "+"
                    graph.add_node(node1,
                                   atomname=node1,
                                   type="",
                                   residue=node1,
                                   element="_join")
                    if not graph.node.get(tokens[0]):
                        raise DabbleError("Can't parse extra residue bond for "
                                          "unit %s, file %s\nLine was: %s" %
                                          (unit, filename, line))
                    graph.add_edge(node1, tokens[0])

                elif incmd == "name":
                    for nod in (n for n in graph.nodes() if \
                              graph.node[n].get("residue") == tokens[1]):

                        # Sanity check residue name here
                        if "*" in tokens[0]:
                            raise DabbleError(
                                "You have a common error in your "
                                ".off file '%s'.\n The residue name "
                                "is invalid. Please check the first "
                                "field in the unit.residue section." %
                                filename)
                        graph.node[nod]["resname"] = tokens[0]
                        graph.node[nod]["residue"] = "self"

                cmdidx += 1

        return True
Beispiel #29
0
    def get_linkage(self, selection, molid):
        """
        Checks if the selection corresponds to a residue that is covalently
        bonded to some other residue other than the normal + or - peptide bonds.
        Sets the patch line (bond line for leap) appropriately and matches
        atom names using a maximal subgraph isomorphism to the normal residue.

        Args:
            selection (VMD atomsel): Selection to check
            molid (int): VMD molecule ID to look for other bonded residue in

        Returns:
            resnames (dict int -> str) Residue name translation dictionary
            atomnames (dict int -> str) Atom name translation dictionary
            conect (str) Leap patch line to apply for this linkage
        """
        # Sanity check selection corresponds to one resid
        resids = set(selection.resid)
        if len(resids) > 1:
            raise ValueError("Multiple resids in selection: %s" % resids)

        # Get externally bonded atoms
        externs = self.get_extraresidue_atoms(selection)

        # Create a subgraph with no externally bonded atoms for matching
        # Otherwise, extra bonded atom will prevent matches from happening
        noext, _ = self.parse_vmd_graph(selection)
        noext.remove_nodes_from([
            i for i in noext.nodes() if noext.node[i].get("residue") != "self"
        ])

        # Find all possible subgraph matches, only amino acids for now, otherwise
        # weird terminal versions like NLYS instead of LYS could be chosen
        matches = {}
        for names in self.known_res:
            graph = self.known_res.get(names).copy()
            graph.remove_nodes_from([
                i for i in graph.nodes()
                if graph.node[i].get("residue") != "self"
            ])

            matcher = isomorphism.GraphMatcher(noext, graph, \
                        node_match=super(AmberMatcher, self)._check_atom_match)

            if matcher.is_isomorphic():
                matches[names] = matcher.match()

        if not matches:
            self.write_dot(noext, "noext.dot")
            return (None, None, None)

        # Want minimally different thing, ie fewest _join atoms different
        def difference(res):
            return len(self.known_res[res]) - len(noext)

        minscore = min(difference(_) for _ in matches)
        possible_matches = [_ for _ in matches if difference(_) == minscore]

        # Prefer canonical amino acids here over weird other types
        if len(possible_matches) > 1:
            canonicals = [_ for _ in possible_matches if _ in self.AMINO_ACIDS]
            if len(canonicals) == 1:
                print("\tPreferring canonical acid %s" % canonicals[0])
                matchname = canonicals.pop()
            else:
                raise DabbleError("Ambiguous bonded residue %s" %
                                  selection.resname[0])
        else:
            matchname = possible_matches.pop()

        # Invert mapping so it's idx-> name. It's backwards b/c of subgraph
        mapping = next(matches[matchname])
        graph = self.known_res.get(matchname)

        # Generate naming dictionaries to return
        nammatch = {
            i: graph.node[mapping[i]].get("atomname")
            for i in mapping.keys()
            if graph.node[mapping[i]].get("residue") == "self"
        }
        resmatch = {
            i: graph.node[mapping[i]].get("resname")
            for i in mapping.keys()
            if graph.node[mapping[i]].get("residue") == "self"
        }

        # Find resid and fragment for other molecule
        partners = []
        residue = selection.residue[0]
        chain = selection.chain[0]
        for num in externs:
            rid = atomsel("index %d" % num, molid=molid).residue[0]
            ch = atomsel("index %d" % num, molid=molid).chain[0]
            if ch != chain:
                partners.append(num)
            elif rid not in (residue + 1, residue - 1):
                partners.append(num)
        if len(partners) != 1:
            return (None, None, None)

        return (resmatch, nammatch, partners[0])
Beispiel #30
0
    def _find_residue_in_rtf(self, resname, molid):
        """
        Scans the input topology files to find a name match for the given
        residue name, then pulls out the atoms involved and checks that they
        are all present in the input coordinates, prompting the user to correct
        the names of atoms that could not be matched.

        Residue ID is used because there can be multiple copies of a residue
        with the same name, but only one has missing or extra atoms.

        Args:
          resname (str): Residue name to check
          molid (int): VMD molecule ID

        Returns:
          True if all matching was successful
          False if the residue name cannot be found
        """

        print("Finding residue name '%s'" % resname)
        for top in self.topologies:
            topfile = open(top, 'r')
            topo_atoms = _get_atoms_from_rtf(text=topfile.readlines(),
                                             resname=resname)
            # Use first definition found of this residue
            if topo_atoms:
                break
            topfile.close()
        if not topo_atoms:
            return False
        print("Successfully found residue %s in input topologies" % resname)

        # Match up atoms with python sets
        pdb_atoms = set(
            atomsel("resname '%s' and user 1.0" % resname, molid=molid).name)
        pdb_only = pdb_atoms - topo_atoms
        topo_only = topo_atoms - pdb_atoms

        # If uneven number of atoms, there are missing or additional atoms
        if len(pdb_atoms) > len(topo_atoms):
            raise DabbleError(
                "\nERROR: Cannot process modified residue %s.\n"
                "There are %d extra atoms in the input structure "
                "that are undefined in the topology file. The "
                "following atoms could not be matched and may "
                "either be misnamed, or additional atoms:\n"
                "[ %s ]\n" % (resname, len(pdb_atoms) - len(topo_atoms),
                              " ".join(pdb_only)))

        if len(topo_atoms) > len(pdb_atoms):
            raise DabbleError(
                "\nERROR: Cannot process modified residue %s.\n"
                "There are %d missing atoms in the input structure "
                "that are defined in the topology file. The "
                "following atoms could not be matched and may "
                "either be misnamed or deleted atoms:\n"
                "[ %s ]\n" % (resname, len(topo_atoms) - len(pdb_atoms),
                              " ".join(topo_only)))

        # Offer to rename atoms that couldn't be matched to the topology
        if pdb_only:
            print("\nWARNING: Having some trouble with modified residue %s.\n"
                  "         The following atom names cannot be matched up "
                  " to the input topologies. They are probably "
                  " misnamed.\n" % resname)
            print("         To help you, here are the atom names that "
                  " should be present according to the topology "
                  " but were not found:\n")
            print("         [ %s ]\n" % ' '.join([str(t) for t in topo_only]))
            print(" Please enter a valid name for each atom as "
                  "it appears or CTRL+D to quit..\n")
            for unmatched in pdb_only:
                print("Unmatched topology names: [ %s ]" % ' '.join(topo_only))

                newname = input("  %s  -> " % unmatched)
                while newname not in topo_only:
                    print("'%s' is not an available name in the topology."
                          "Please try again.\n" % newname)
                    newname = input("  %s  -> " % unmatched)
                atomsel("resname '%s' and user 1.0 and name '%s'" %
                        (resname, unmatched)).name = newname
                pdb_atoms = set(
                    atomsel("resname '%s' and user 1.0" % resname).name)
                topo_only = topo_atoms - pdb_atoms
                resname = newname

            # Recurse to check that everything is assigned correctly
            self._find_residue_in_rtf(resname, molid)
        print("Matched up all atom names for resname '%s'\n" % resname)
        return True