Esempio n. 1
0
    def get_disulfide(self, selection, molid):
        """
        Checks if the selection corresponds to a cysteine in a disulfide bond.
        Sets the patch line appropriately and matches atom names using
        a subgraph match to the normal cysteine residue

        Args:
            selection (VMD atomsel): Selection to check
            molid (int): VMD molecule ID to look for other CYS in

        Returns:
            resnames (dict int -> str) Residue name translation dictionary
            atomnames (dict int -> str) Atom name translation dictionary
            conect (int) Residue this one is connected to
       """
        rgraph, _ = self.parse_vmd_graph(selection)

        # Sanity check
        if not self.known_res.get("CYX"):
            raise DabbleError("CYX undefined. Check forcefields!")

        # Check for the 3 join atoms corresponding to the disulfide bonds
        externs = self.get_extraresidue_atoms(selection)
        if len(externs) != 3:
            return (None, None, None)

        # With the AMBER format, the CYX residue should be a subgraph of this
        # residue as the only difference is the _join bond
        graph = self.known_res.get("CYX")
        matcher = isomorphism.GraphMatcher(rgraph, graph, \
                                           node_match=self._check_atom_match)
        if matcher.subgraph_is_isomorphic():
            match = next(matcher.match())
        else:
            return (None, None, None)

        # Generate naming dictionaries to return
        nammatch = dict((i, graph.node[match[i]].get("atomname")) \
                        for i in match.keys() if \
                        graph.node[match[i]].get("residue") == "self")
        resmatch = dict((i, graph.node[match[i]].get("resname")) \
                        for i in match.keys() if \
                        graph.node[match[i]].get("residue") == "self")

        # Now we know it's a cysteine in a disulfide bond
        # Identify which resid and fragment corresponds to the other cysteine
        partners = [n for n in externs if \
                    atomsel("index %d" % n,
                            molid=molid).get("element")[0] == "S"]
        if not partners:
            raise DabbleError("3 bonded Cys %d isn't a valid disulfide!" %
                              selection.get('resid')[0])
        osel = atomsel("index %d" % partners[0], molid=molid)
        conect = osel.get("residue")[0]

        return (resmatch, nammatch, conect)
Esempio n. 2
0
    def get_lipid_tails(self, selection, head):
        """
        Obtains a name mapping for both ligand tails in a system given
        a selection describing the lipid and the indices of the head
        group atoms.

        Args:
            selection (VMD atomsel): Selection to pull tails from
            head (list of int): Atom indices in the head group of this lipid.
                Obtain with get_lipid_head function.

        Returns:
            (array of tuples that are dict int->str): Atom index to
                resname matched, atom index to atom name translation
                dictionaries for both tails

        Raises:
            ValueError: If a tail could not be matched or if there is an
                incorrect number of tails somehow attached.
        """
        resname = selection.get('resname')[0]
        rgraph = self.parse_vmd_graph(selection)[0]
        rgraph.remove_nodes_from(head)

        if nx.number_connected_components(rgraph) != 2:
            raise DabbleError("Incorrect number of tails attached to %s:%s!" %
                              (resname, selection.get('resid')[0]))

        taildicts = []
        for tgraph in nx.connected_component_subgraphs(rgraph, copy=True):
            matched = False
            for matchname in (_ for _ in self.lipid_tails if \
                              self.known_res.get(_)):
                graph = self.known_res.get(matchname)
                truncated = nx.Graph(graph)
                truncated.remove_nodes_from([n for n in graph.nodes() if \
                                             graph.node[n]["residue"] != "self"])
                matcher = isomorphism.GraphMatcher(
                    tgraph, truncated, node_match=self._check_atom_match)

                if matcher.is_isomorphic():
                    matched = True
                    match = next(matcher.match())
                    nammatch = dict((i, graph.node[match[i]].get("atomname")) \
                                    for i in match.keys() if \
                                    graph.node[match[i]].get("residue") == "self")
                    resmatch = dict((i, graph.node[match[i]].get("resname")) \
                                    for i in match.keys() if \
                                    graph.node[match[i]].get("residue") == "self")
                    taildicts.append((resmatch, nammatch))
                    break
            if not matched:
                raise DabbleError("Couldn't find a match for tail %s:%s" %
                                  (resname, selection.get('resid')[0]))
        return taildicts
Esempio n. 3
0
    def get_names(self, selection, print_warning=False):
        """
        Returns at atom name matching up dictionary.
        Does the generic moleculematcher algorithm then checks that only
        one resname matched since for CHARMM there is no concept
        of a unit and only one named residue is defined per topology.

        Args:
            selection (VMD atomsel): Selection to rename
            print_warning (bool): Debug output

        Returns:
            (str) resname matched
            (dict int->str) translation dictionary from index to atom name

        Raises:
            ValueError if more than one residue name is matched
        """
        (resnames, atomnames) = super(CharmmMatcher,
                                      self).get_names(selection, print_warning)
        if not resnames:
            return (None, None)

        # Set the resname correctly after checking only one resname
        # matched since this is charmm
        resname = set(resnames.values())
        if len(resname) > 1:
            raise DabbleError(
                "More than one residue name was returned as "
                "belonging to a single residue in CHARMM matching."
                " Not sure how this happened; something is really "
                "really wrong. Residue was: %s:%d" %
                (selection.get("resname")[0], selection.get("resid")[0]))

        return (resname.pop(), atomnames)
Esempio n. 4
0
def _find_convertible_water_molecule(
        molid,  # pylint: disable=invalid-name
        water_sel='resname TIP3',
        min_ion_dist=5.0):
    """
    Finds a water molecule that can be converted to an ion

    Args:
      molid (int): VMD molid to look at
      water_sel (str): VMD atom selection for water
      min_ion_dist (float): Minimum distance between ionds

    Returns:
      (int) Atom index of a water oxygen that is convertible

    Raises:
      ValueError if no convertible water molecules are found
    """

    inclusion_sel = 'beta 1 and noh and (%s)' % water_sel
    exclusion_sel = 'beta 1 and not (%s)' % water_sel
    sel = atomsel('(%s) and not pbwithin %f of (%s)' \
                  % (inclusion_sel, min_ion_dist, exclusion_sel), molid).get("index")
    if not len(sel):
        raise DabbleError("No convertible water molecules found in %s" % sel)

    return sel[random.randint(0, len(sel))]
Esempio n. 5
0
    def _assign_elements(self, graph):
        """
        Assigns elements to parsed in residues. Called after all
        topology files are read in. Element "_join" is assigned
        to atoms from other residues (+- atoms), since these are only
        defined by name.

        Args:
            graph (networkx graph): The graph to assign elements to

        Raises:
            ValueError if an atom type can't be assigned an element
        """
        # Now that all atom and mass lines are read, get the element for each atom
        for node, data in graph.nodes(data=True):
            if data.get('residue') != "self":
                typestr = ''.join([
                    i for i in node
                    if not i.isdigit() and i != "+" and i != "-"
                ])
            else:
                typestr = data.get('type')

            element = self.nodenames.get(typestr)
            if not element:
                raise DabbleError("Unknown atom type %s, name '%s'" %
                                  (typestr, node))
            data['element'] = element
Esempio n. 6
0
def check_out_type(value, forcefield, hmr=False):
    """
    Checks the file format of the requiested output is supported, and sets
    internal variables as necessary.

    Args:
      value (str): Filename requested
      forcefield (str): Force field requested
      hmr (bool): If hydrogen mass repartitioning is requested

    Returns:
      The requested output format

    Raises:
      ValueError: if the output format requested is currently unsupported
      NotImplementedError: if hydrogen mass repartitioning is requested
                           for amber files
    """

    ext = value.rsplit('.')[-1]
    if ext == 'mae':
        out_fmt = 'mae'
    elif ext == 'pdb':
        out_fmt = 'pdb'
    elif ext == 'dms':
        out_fmt = 'dms'
    elif ext == 'psf' and "charmm" in forcefield:
        out_fmt = 'charmm'
    elif ext == 'prmtop' and forcefield in [
            "amber", "charmm", "charmm36", "charmm36m"
    ]:
        out_fmt = 'amber'
    else:
        raise DabbleError("%s is an unsupported format with %s forcefield" %
                          (value, forcefield))

    if hmr and (out_fmt != 'amber'):
        raise DabbleError("HMR only supported with AMBER outputs!")

    # Check if amber forcefield can be used
    if forcefield == "amber" and not os.environ.get("AMBERHOME"):
        raise DabbleError("AMBERHOME must be set to use AMBER forcefields!")

    return out_fmt
Esempio n. 7
0
    def _rename_atoms_amber(self):
        """
        Matches up atom names with those in the provided topologies and
        sets the atom and residue names correctly in the built molecule.
        Handles all non-lipid atoms. Sets the user field of all atoms to 1.0
        to track which things have been written.

        Returns:
            (set of tuples (int,int)): Residue #s of disulfide bonded residues

        Raises:
            ValueError if a residue definition could not be found
        """

        nonlips = set(
            atomsel("not (%s)" % self.lipid_sel,
                    molid=self.molid).get("residue"))
        n_res = len(nonlips)
        conect = set()  # Atom indices bound to noncanonical residues
        while nonlips:
            if len(nonlips) % 500 == 0:
                sys.stdout.write("Renaming residues.... %.0f%%  \r" %
                                 (100. - 100 * len(nonlips) / float(n_res)))
                sys.stdout.flush()

            residue = nonlips.pop()
            sel = atomsel("residue %s" % residue)
            resnames, atomnames = self.matcher.get_names(sel,
                                                         print_warning=False)

            # Check if it's a linkage to another amino acid
            if not resnames:
                resnames, atomnames, other = self.matcher.get_linkage(
                    sel, self.molid)
                if not resnames:
                    rgraph = self.matcher.parse_vmd_graph(sel)[0]
                    write_dot(rgraph, "rgraph.dot")
                    raise DabbleError(
                        "ERROR: Could not find a residue definition "
                        "for %s:%s" %
                        (sel.get("resname")[0], sel.get("resid")[0]))

                print("\tBonded residue: %s:%d -> %s" %
                      (sel.get("resname")[0], sel.get("resid")[0],
                       list(resnames.values())[0]))
                conect.add(other)

            # Do the renaming
            self._apply_naming_dictionary(resnames, atomnames)

        atomsel('all').set('user', 1.0)
        sys.stdout.write("\n")
        return conect
Esempio n. 8
0
def load_solute(filename, tmp_dir):
    """
    Loads a molecule input file, guessing the format from the extension.

    Args:
      filename (str): Filename to load
      tmp_dir (str): Directory to put temporary files in

    Returns:
      (int) VMD molecule ID that was loaded

    Raises:
      ValueError if filetype is currently unsupported
    """
    if len(filename) < 3:
        raise DabbleError("Cannot determine filetype of input file '%s'" %
                          filename)
    ext = filename.split(".")[-1]
    if ext == 'mae':
        molid = molecule.load('mae', filename)
    elif ext == 'dms':
        molid = molecule.load('dms', filename)
    elif ext == 'mol2':
        molid = molecule.load('mol2', filename)
    elif ext == 'pdb':
        # Need to convert to MAE so concatenation will work later
        temp_mae = tempfile.mkstemp(suffix='.mae',
                                    prefix='dabble_input',
                                    dir=tmp_dir)[1]
        molid = molecule.load('pdb', filename)
        atomsel('all').write('mae', temp_mae)
        molecule.delete(molid)
        molid = molecule.load('mae', temp_mae)
    else:
        raise DabbleError("Filetype '%s' currently unsupported "
                          "for input protein" % ext)
    return molid
Esempio n. 9
0
    def _remove_xy_lipids(self, molid):
        """
        Removes residues in the +-XY direction in the system. Used to chop off
        lipids that are protruding outside of the box dimensions.

        Args:
          molid (int): VMD molecule id to use

        Returns:
          (int) number of atoms deleted

        Raises:
          ValueError if the 'lipid' only contains hydrogens
        """

        # Select residues that are outside the box
        half_x_size = self.size[0] / 2.0
        half_y_size = self.size[1] / 2.0
        box_sel_str = 'abs(x) > %f or abs(y) > %f' % (half_x_size, half_y_size)

        # Identify lipids that have some part outside of the box
        suspicious_lipid_residues = list(set(atomsel('(%s) and (%s)' % \
               (self.opts['lipid_sel'], box_sel_str), molid=molid).get('residue')))
        bad_lipids = []

        # Delete lipids whose center is too far out of the box, keep others
        for i in suspicious_lipid_residues:
            lipid_center = atomsel('noh and residue %s' % str(i),
                                   molid=molid).center()
            # Sanity check
            if not len(lipid_center):
                raise DabbleError(
                    "No heavy atoms found in suspicious residue %s"
                    "Check your input file." % str(i))

            if abs(lipid_center[0]) > half_x_size or \
               abs(lipid_center[1]) > half_y_size:
                bad_lipids.append(i)
        lipid_headgroup_sel = 'residue ' + ' '.join(
            [str(l) for l in bad_lipids])

        # Do the deletion
        removal_sel_str = '(%s) or not (%s)' % (lipid_headgroup_sel,
                                                self.opts['lipid_sel'])
        total = _remove_residues(
            'noh and (%s) and (%s) and not (%s)' %
            (box_sel_str, removal_sel_str, self.solute_sel),
            molid=molid)
        return total
Esempio n. 10
0
    def convert_ions(self, salt_conc, cation, molid):
        """
        Calculates the charge of the molecule and adds salt ions to get the
        desired concentration by converting water molecules to salt

        Args:
          salt_conc (float): Desired salt concentration in M
          cation (str): Cation to add, either Na or K
          molid (int): VMD molecule id to consider

        Returns:
          (int) number of ions added

        Raises:
          ValueError if invalid cation is specified
        """
        # Check cation
        if self.opts.get('cation') not in ['Na', 'K']:
            raise DabbleError("Invalid cation '%s'" % self.opts.get('cation'))

        # Give existing cations correct nomenclature
        molutils.set_cations(molid, cation)

        # Calculate number of salt ions needed
        pos_ions_needed, neg_ions_needed, num_wat, total_cations, total_anions, \
        cation_conc, anion_conc = molutils.get_num_salt_ions_needed(molid,
                                                                    salt_conc,
                                                                    cation=cation)

        print("Solvent will be %d waters, %d %s (%.3f M), %d Cl (%.3f M)" %
              (num_wat, total_cations, self.opts.get('cation'), cation_conc,
               total_anions, anion_conc))

        print("Converting %d waters to %d %s ions and %d Cl ions..." %
              (pos_ions_needed + neg_ions_needed, pos_ions_needed, cation,
               neg_ions_needed))

        # Add the ions
        for _ in range(pos_ions_needed):
            add_salt_ion(cation, molid)
        for _ in range(neg_ions_needed):
            add_salt_ion('Cl', molid)

        return pos_ions_needed + neg_ions_needed
Esempio n. 11
0
def set_cations(molid, element, filter_sel='none'):
    """
    Sets all of the specified atoms to a cation

    Args:
      molid (int): VMD molecule ID to consider
      element (str in Na, K): Cation to convert
      filter_sel (str): VMD atom selection string for atoms to convert

    Raises:
      ValueError if invalid cation specified
    """

    if element not in ['Na', 'K']:
        raise DabbleError("Invalid cation '%s'. "
                          "Supported cations are Na, K" % element)

    for gid in tuple(atomsel('element K Na and not (%s)' % filter_sel)):
        set_ion(molid, gid, element)
Esempio n. 12
0
def get_net_charge(sel, molid):
    """
    Gets the net charge of an atom selection, using the charge
    field of the data.

    Args:
      sel (str): VMD atom selection to compute the charge of
      molid (int): VMD molecule id to select within

    Returns:
      (int): The rounded net charge of the selection

    Throws:
      ValueError: If charge does not round to an integer value
    """

    charge = np.array(atomsel(sel, molid=molid).get('charge'))
    if charge.size == 0:
        return 0
    print("Calculating charge on %d atoms" % charge.size)

    # Check the system has charges defined
    if all(charge == 0):
        print("\nWARNING: All charges in selection are zero. "
              "Check the input file has formal charges defined!\n"
              "Selection was:\n%s\n" % sel)
        print(set(charge))

    # Round to nearest integer nd check this is okay
    net_charge = sum(charge)
    rslt = round(net_charge)
    if abs(rslt - net_charge) > 0.05:
        raise DabbleError("Total charge of %f is not integral within a "
                          "tolerance of 0.05. Check your input file." %
                          net_charge)

    return int(rslt)
Esempio n. 13
0
    def get_lipid_head(self, selection):
        """
        Obtains a name mapping for a lipid head group given a selection
        describing a possible lipid.

        Args:
            selection (VMD atomsel): Selection to set names for

        Returns:
            (dict int->str) Atom index to resname matched
            (dict int->str) Atom index to atom name matched up
            (int) Atom index corresponding to - direction tail

        Raises:
            KeyError: if no matching possible
        """

        resname = selection.get('resname')[0]
        rgraph = self.parse_vmd_graph(selection)[0]

        # Check if a lipid head group is part of this selection.
        # Remove _join residues from the head so that subgraph match can
        # be successfully completed
        matches = {}
        for matchname in (_ for _ in self.lipid_heads
                          if self.known_res.get(_)):
            graph = self.known_res.get(matchname)
            truncated = nx.Graph(graph)
            truncated.remove_nodes_from([n for n in graph.nodes() if \
                                         graph.node[n]["residue"] != "self"])
            matcher = isomorphism.GraphMatcher(
                rgraph, truncated, node_match=self._check_atom_match)
            if matcher.subgraph_is_isomorphic():
                matches[matchname] = next(matcher.match())

        if not matches:
            return (None, None, None)
        matchname = max(matches.keys(), key=(lambda x: len(self.known_res[x])))
        match = matches[matchname]
        graph = self.known_res.get(matchname)

        # Generate naming dictionaries to return
        nammatch = dict((i, graph.node[match[i]].get("atomname")) \
                        for i in match.keys() if \
                        graph.node[match[i]].get("residue") == "self")
        resmatch = dict((i, graph.node[match[i]].get("resname")) \
                        for i in match.keys() if \
                        graph.node[match[i]].get("residue") == "self")

        # Find atom index on non-truncated graph that corresponds to the
        # - direction join atom. Necessary to figure out the order in which
        # to list the tails.
        minusbnded = [_ for _ in match.keys() if match[_] in \
                      [e[1] for e in graph.edges(nbunch=["-"])]]
        if len(minusbnded) != 1:
            raise DabbleError(
                "Could not identify tail attached to lipid %s:%s!" %
                (resname, selection.get('resid')[0]))
        minusidx = [_ for _ in atomsel("index %s" % minusbnded[0]).bonds[0] \
                    if _ not in match.keys()]
        if len(minusidx) != 1:
            raise DabbleError(
                "Could not identify tail attached to lipid %s:%s!" %
                (resname, selection.get('resid')[0]))

        return (resmatch, nammatch, minusidx[0])
Esempio n. 14
0
    def get_linkage(self, selection, molid):
        """
        Checks if the selection corresponds to a residue that is covalently
        bonded to some other residue other than the normal + or - peptide bonds.
        Sets the patch line (bond line for leap) appropriately and matches
        atom names using a maximal subgraph isomorphism to the normal residue.

        Args:
            selection (VMD atomsel): Selection to check
            molid (int): VMD molecule ID to look for other bonded residue in

        Returns:
            resnames (dict int -> str) Residue name translation dictionary
            atomnames (dict int -> str) Atom name translation dictionary
            conect (str) Leap patch line to apply for this linkage
        """
        # Sanity check selection corresponds to one resid
        resids = set(selection.get("resid"))
        if len(resids) > 1:
            raise ValueError("Multiple resids in selection: %s" % resids)

        # Get externally bonded atoms
        externs = self.get_extraresidue_atoms(selection)

        # Create a subgraph with no externally bonded atoms for matching
        # Otherwise, extra bonded atom will prevent matches from happening
        noext, _ = self.parse_vmd_graph(selection)
        noext.remove_nodes_from([
            i for i in noext.nodes() if noext.node[i].get("residue") != "self"
        ])

        # Find all possible subgraph matches, only amino acids for now, otherwise
        # weird terminal versions like NLYS instead of LYS could be chosen
        matches = {}
        for names in self.known_res:
            graph = self.known_res.get(names).copy()
            graph.remove_nodes_from([
                i for i in graph.nodes()
                if graph.node[i].get("residue") != "self"
            ])

            matcher = isomorphism.GraphMatcher(noext, graph, \
                        node_match=super(AmberMatcher, self)._check_atom_match)

            if matcher.is_isomorphic():
                matches[names] = matcher.match()

        if not matches:
            write_dot(noext, "noext.dot")
            return (None, None, None)

        # Want minimumally different thing, ie fewest _join atoms different
        def difference(res):
            return len(self.known_res[res]) - len(noext)

        minscore = min(difference(_) for _ in matches)
        possible_matches = [_ for _ in matches if difference(_) == minscore]

        # Prefer canonical amino acids here over weird other types
        if len(possible_matches) > 1:
            canonicals = [_ for _ in possible_matches if _ in self._acids]
            if len(canonicals) == 1:
                print("\tPreferring canonical acid %s" % canonicals[0])
                matchname = canonicals.pop()
            else:
                raise DabbleError("Ambiguous bonded residue %s" %
                                  selection.get("resname")[0])
        else:
            matchname = possible_matches.pop()

        # Invert mapping so it's idx-> name. It's backwards b/c of subgraph
        mapping = next(matches[matchname])
        graph = self.known_res.get(matchname)

        # Generate naming dictionaries to return
        nammatch = dict((i, graph.node[mapping[i]].get("atomname")) \
                         for i in mapping.keys() if \
                         graph.node[mapping[i]].get("residue") == "self")
        resmatch = dict((i, graph.node[mapping[i]].get("resname")) \
                        for i in mapping.keys() if \
                        graph.node[mapping[i]].get("residue") == "self")

        # Find resid and fragment for other molecule
        partners = []
        resid = selection.get("residue")[0]
        chain = selection.get("chain")[0]
        for num in externs:
            rid = atomsel("index %d" % num, molid=molid).get("residue")[0]
            ch = atomsel("index %d" % num, molid=molid).get("chain")[0]
            if ch != chain:
                partners.append(num)
            elif rid != resid + 1 and rid != resid - 1:
                partners.append(num)
        if len(partners) != 1:
            return (None, None, None)

        return (resmatch, nammatch, partners[0])
Esempio n. 15
0
    def _write_protein_blocks(self, molid, frag):
        """
        Writes a protein fragment to a pdb file for input to psfgen
        Automatically assigns amino acid names

        Args:
            molid (int): VMD molecule ID of renumbered protein
            frag (str): Fragment to write

        Returns:
            (list of str): Patches to add to the psfgen input file
                after all proteins have been loaded
       """

        print("Setting protein atom names")

        # Put our molecule on top to simplify atom selection language
        old_top = molecule.get_top()
        molecule.set_top(molid)
        patches = set()
        extpatches = set()
        seg = "P%s" % frag

        residues = list(set(atomsel("fragment '%s'" % frag).get('residue')))
        for residue in residues:
            sel = atomsel('residue %s' % residue)
            resid = sel.get('resid')[0]
            # Only try to match single amino acid if there are 1 or 2 bonds
            if len(self.matcher.get_extraresidue_atoms(sel)) < 3:
                (newname, atomnames) = self.matcher.get_names(sel,
                                                              print_warning=False)

            # See if it's a disulfide bond participant
            else:
                (newname, patchline, atomnames) = \
                        self.matcher.get_disulfide("residue %d" % residue,
                                                   frag, molid)
                if newname:
                    extpatches.add(patchline)

            # Couldn't find a match. See if it's a patched residue
            if not newname:
                (newname, patch, atomnames) = self.matcher.get_patches(sel)
                if newname:
                    patches.add("patch %s %s:%d\n" % (patch, seg, resid))

            # Fall through to error condition
            if not newname:
                raise DabbleError("Couldn't find a patch for %s:%s"
                                  % (sel.get('resname')[0], resid))

            # Do the renaming
            for idx, name in atomnames.items():
                atom = atomsel('index %s' % idx)
                if atom.get('name')[0] != name and "+" not in name and \
                   "-" not in name:
                    atom.set('name', name)
            sel.set('resname', newname)

        # Save protein chain in the correct order
        filename = self.tmp_dir + '/psf_protein_%s.pdb' % seg
        _write_ordered_pdb(filename, "fragment '%s'" % frag, molid)
        print("\tWrote %d atoms to the protein segment %s"
              % (len(atomsel("fragment %s" % frag)), seg))

        # Now write to psfgen input file
        string = '''
        set protnam %s
        segment %s {
          first none
          last none
          pdb $protnam
        }
        ''' % (filename, seg)
        self.file.write(string)

        print("Applying the following single-residue patches to P%s:\n" % frag)
        print("\t%s" % "\t".join(patches))
        self.file.write(''.join(patches))
        self.file.write("\n")

        self.file.write("coordpdb $protnam %s\n" % seg)

        if old_top != -1:
            molecule.set_top(old_top)

        return extpatches
Esempio n. 16
0
    def _orient_solute(self, molid):
        """
        Orients the solute. Can either move it explicitly in the z direction,
        or align to an OPM structure.

        Args:
          molid (int): VMD molecule ID to orient
          z_move (float): Amount to move in the Z direction
          z_rotation (float): Amount to rotate membrane relative to protein,
            can just take this straight from the OPM website value
          opm_pdb (str): Filename of OPM structure to align to
          opm_align (str): Atom selection string to align
          tmp_dir (str): Directory to put temporary files in

        Returns:
          (int) VMD molecule ID of oriented system

        Raises:
          ValueError if movement and alignment arguments are both specified
        """

        # Check that OPM and alignment aren't both specified
        if self.opts.get('opm_pdb') and \
                (self.opts.get('z_move') != 0 or self.opts.get('z_rotation') != 0):
            raise DabbleError("ERROR: Cannot specify an OPM pdb and "
                              "manual orientation information")

        if self.opts.get('opm_pdb'):
            opm = molecule.load('pdb', self.opts['opm_pdb'])
            moveby = atomsel('protein and backbone', molid=molid).fit( \
                             atomsel(self.opts.get('opm_align'), molid=opm))
            atomsel('all', molid=molid).move(moveby)
            molecule.delete(opm)
            return molid

        if self.opts.get('z_move'):
            atomsel('all', molid=molid).moveby((0, 0, self.opts['z_move']))
            if not self.opts.get('z_rotation'):
                return molid

        if self.opts.get('z_rotation'):
            trans.resetview(molid)  # View affect rotation matrix, now it's I
            # This is negative because we want membrane flat along the z-axis,
            # and OPM lists the membrane rotation relative to the protein
            theta = math.radians(-1 * self.opts['z_rotation'])
            # Rotation matrix in row order with 4th dimension just from I
            # pylint: disable=bad-whitespace, bad-continuation
            rotmat = [
                math.cos(theta), -1 * math.sin(theta), 0, 0,
                math.sin(theta),
                math.cos(theta), 0, 0, 0, 0, 1, 0, 0, 0, 0, 1
            ]
            # pylint: enable=bad-whitespace, bad-continuation
            trans.set_rotation(molid, rotmat)
            return molid

        # Center the system according to VMD's internal metric, then
        # move the protein in the xy plane so that there is equal padding
        # on either side
        molid = molutils.center_system(molid=molid,
                                       tmp_dir=self.opts.get('tmp_dir'),
                                       center_z=self.water_only)
        system = atomsel('all', molid=molid)
        tx = (-max(system.get('x')) - min(system.get('x'))) / 2.
        ty = (-max(system.get('y')) - min(system.get('y'))) / 2.
        temp_mae = tempfile.mkstemp(suffix='.mae',
                                    prefix='dabble_centered',
                                    dir=self.opts.get('tmp_dir'))[1]
        system.moveby((tx, ty, 0))
        system.write('mae', temp_mae)
        molecule.delete(molid)
        new_id = molecule.load('mae', temp_mae)
        return new_id
Esempio n. 17
0
    def write(self, prmtop_name):
        """
        Creates a prmtop with either AMBER or CHARMM parameters.
        """
        self.prmtop_name = prmtop_name

        # Charmm forcefield
        if "charmm" in self.forcefield:
            psfgen = CharmmWriter(molid=self.molid,
                                  tmp_dir=self.tmp_dir,
                                  lipid_sel=self.lipid_sel,
                                  extra_topos=self.extra_topos,
                                  override_defaults=self.override)
            self.topologies = psfgen.write(self.prmtop_name)
            self._psf_to_charmm_amber()

        # Amber forcefield
        elif "amber" in self.forcefield:

            # Initialize the matcher
            self.matcher = AmberMatcher(self.topologies)
            print("Using the following topologies:")
            for top in self.topologies:
                print("  - %s" % top.split("/")[-1])
                top = os.path.abspath(top)

            for par in self.parameters:
                par = os.path.abspath(par)

            # Assign atom types
            print("Assigning AMBER atom types...")
            conect = self._rename_atoms_amber()

            # Create temporary pdb files that will be leap inputs
            pdbs = []
            pdbs.append(self._write_lipids())
            prot_pdbseqs = self._write_protein()
            pdbs.extend(self._write_solvent())
            ligfiles = self._write_ligands()

            # Now invoke leap to create the prmtop and inpcrd
            outfile = self._run_leap(ligfiles, prot_pdbseqs, pdbs, conect)

            # Repartion hydrogen masses if requested
            if self.hmr:
                print("\nRepartitioning hydrogen masses...")
                parm = AmberParm(prm_name=outfile + ".prmtop",
                                 xyz=outfile + ".inpcrd")
                action = HMassRepartition(parm, "dowater")
                action.execute()
                write = parmout(action.parm, "%s.prmtop" % self.prmtop_name)
                #self.prmtop_name))
                write.execute()
                parm = write.parm

            # Check validity of output prmtop using parmed
            parm = AmberParm(prm_name=self.prmtop_name + ".prmtop",
                             xyz=self.prmtop_name + ".inpcrd")
            print("\nChecking for problems with the prmtop...")
            print("        Verify all warnings!")
            action = checkValidity(parm)
            action.execute()

        else:
            raise DabbleError("Unhandled forcefield: %s" % self.forcefield)
Esempio n. 18
0
    def get_cell_size(self,
                      mem_buf,
                      wat_buf,
                      molid=None,
                      filename=None,
                      zh_mem_full=_MEMBRANE_FULL_THICKNESS / 2.0,
                      zh_mem_hyd=_MEMBRANE_HYDROPHOBIC_THICKNESS / 2.0):
        """
        Gets the cell size of the final system given initial system and
        buffers. Detects whether or not a membrane is present. Sets the
        size of the system.

        Args:
          mem_buf (float) : Membrane (xy) buffer amount
          wat_buf (float) : Water (z) buffer amount
          molid (int) : VMD molecule ID to consider (can't use with filename)
          filename (str) : Filename of system to consider (can't use w molid)
          zh_mem_full (float) : Membrane thickness
          zh_mem_hyd (float) : Membrane hydrophobic region thickness

        Returns:
        return dx_sol, dy_sol, dx_tm, dy_tm, dz_full
          (float tuple): x solute dimension, y solute dimension,
            TM x solute dimension, TM y solute dimension, solute z dimension

        Raises:
          ValueError: if filename and molid are both specified
        """

        # Sanity check
        if filename is not None and molid is not None:
            raise ValueError("Specified molid and filename to get_cell_size")

        if filename is not None:
            top = molecule.get_top()
            molid = molecule.read(-1, 'mae', filename)
        elif molid is None:
            molid = molecule.get_top()

        # Some options different for water-only systems (no lipid)
        if self.water_only:
            solute_z = atomsel(self.solute_sel, molid=molid).get('z')
            dx_tm = 0.0
            dy_tm = 0.0
            sol_solute = atomsel(self.solute_sel, molid)
        else:
            solute_z = atomsel(self.solute_sel, molid=molid).get('z')
            tm_solute = atomsel(
                '(%s) and z > %f and z < %f' %
                (self.solute_sel, -zh_mem_hyd, zh_mem_hyd), molid)
            if len(tm_solute):
                dx_tm = max(tm_solute.get('x')) - min(tm_solute.get('x'))
                dy_tm = max(tm_solute.get('y')) - min(tm_solute.get('y'))
            else:
                dx_tm = dy_tm = 0

            sol_solute = atomsel(
                '(%s) and (z < %f or z > %f)' %
                (self.solute_sel, -zh_mem_hyd, zh_mem_hyd), molid)

        # Solvent invariant options
        dx_sol = max(sol_solute.get('x')) - min(sol_solute.get('x'))
        dy_sol = max(sol_solute.get('y')) - min(sol_solute.get('y'))

        if self.opts.get('user_x'):
            self.size[0] = self.opts['user_x']
        else:
            self.size[0] = max(dx_tm + 2. * mem_buf, dx_sol + 2. * wat_buf)
        if self.opts.get('user_y'):
            self.size[1] = self.opts['user_y']
        else:
            self.size[1] = max(dy_tm + 2. * mem_buf, dy_sol + 2. * wat_buf)

        # Z dimension. If there's a membrane, need to account for asymmetry
        # in the Z dimension where the protein could be uneven in the membrane
        # or even peripheral
        if self.opts.get('user_z'):
            self.size[2] = self.opts['user_z']
            buf = (self.opts['user_z'] - max(solute_z) + min(solute_z)) / 2
            self._zmax = max(solute_z) + buf
            self._zmin = min(solute_z) - buf
            if zh_mem_full > self._zmax or -zh_mem_full < self._zmin:
                raise DabbleError("Specified user z of %f is too small to "
                                  "accomodate protein and membrane!" %
                                  self.opts['user_z'])
        else:
            if self.water_only:
                self._zmax = max(solute_z) + wat_buf
                self._zmin = min(solute_z) - wat_buf
            else:
                self._zmax = max(max(solute_z) + wat_buf, zh_mem_full)
                self._zmin = min(min(solute_z) - wat_buf, -zh_mem_full)
            self.size[2] = self._zmax - self._zmin

        # Cleanup temporary file, if read in
        if filename is not None:
            molecule.delete(molid)
            if top != -1:
                molecule.set_top(top)

        return dx_sol, dy_sol, dx_tm, dy_tm, max(solute_z) - min(solute_z)
Esempio n. 19
0
    def _write_lipids(self):
        """
        Splits lipids into modular tail, head, tail that Lipid14 specifies.
        Closes the old molecule and loads the new renumbered molecule.
        Does name matching for lipids. Writes the pdb file with TER cards
        in between each lipid.

        Returns:
            (str): File name of PDB file written

        Raises:
            ValueError if an invalid lipid is found
        """
        lipid_res = set(atomsel(self.lipid_sel).get('residue'))
        n_lips = len(lipid_res)
        if not n_lips:
            return None

        molecule.set_top(self.molid)
        temp = tempfile.mkstemp(suffix='.pdb',
                                prefix='amber_lipids_',
                                dir=self.tmp_dir)[1]
        fileh = open(temp, 'w')

        # Check if it's a normal residue first in case cholesterol etc in
        # the selection
        resid = 1
        idx = 1
        while lipid_res:
            residue = lipid_res.pop()
            if len(lipid_res) % 1 == 0:
                sys.stdout.write(
                    "Writing lipids.... %.0f%%  \r" %
                    (100. - 100. * len(lipid_res) / float(n_lips)))
                sys.stdout.flush()

            sel = atomsel('residue %s' % residue)
            headres, headnam, minusidx = self.matcher.get_lipid_head(sel)

            # If it's not a lipid head, check if it's a normal residue
            if not headres:
                resnames, atomnames = self.matcher.get_names(
                    sel, print_warning=False)
                if not resnames:
                    raise DabbleError(
                        "Residue %s:%s not a valid lipid" %
                        (sel.get('resname')[0], sel.get('resid')[0]))
                self._apply_naming_dictionary(resnames, atomnames)
                sel.set('resid', resid)
                resid += 1
                continue
            else:
                # Apply the name to the heads
                self._apply_naming_dictionary(headres, headnam)

                # Pull out the tail resnames and indices
                taildicts = self.matcher.get_lipid_tails(sel, headnam.keys())
                for (resnames, atomnames) in taildicts:
                    self._apply_naming_dictionary(resnames, atomnames)

                # Renumber the first tail, head, then second tail and write
                # them separately. Needs to be done this way to guarantee order.
                # An atom index that's in the minus tail is given by get_lipid_head.

                # First tail
                firstdict = [_ for _ in taildicts if minusidx in _[0].keys()]
                if len(firstdict) != 1:
                    raise DabbleError(
                        "Error finding tails for lipid %s:%s" %
                        (sel.get('resname')[0], sel.get('resid')[0]))
                firstdict = firstdict[0]

                lsel = atomsel('index %s' % ' '.join([str(x) for x in \
                               firstdict[0].keys()]))
                lsel.set('resid', resid)
                lsel.set('user', 0.0)
                idx = self._write_residue(lsel, fileh, idx)
                taildicts.remove(firstdict)

                # Head
                lsel = atomsel('index %s' % ' '.join([str(x) for x in \
                               headnam.keys()]))
                lsel.set('resid', resid + 1)
                lsel.set('user', 0.0)
                idx = self._write_residue(lsel, fileh, idx)

                # Second tail
                lsel = atomsel('index %s' % ' '.join([str(x) for x in \
                               taildicts[0][0].keys()]))
                lsel.set('resid', resid + 2)
                lsel.set('user', 0.0)
                idx = self._write_residue(lsel, fileh, idx)
                resid += 3
                fileh.write("TER\n")  # TER card between lipid residues

        fileh.write("END\n")
        fileh.close()
        sys.stdout.write("\n")
        return temp
Esempio n. 20
0
    def __init__(self, molid, **kwargs):
        """
        Creates an AMBER Writer.

        Args:
            molid (int): VMD molecule ID of system to write
            tmp_dir (str): Directory for temporary files. Defaults to "."
            forcefield (str): charmm36mm, charmm36, or amber
            lipid_sel (str): Lipid selection string. Defaults to "lipid"
            hmr (bool): If hydrogen masses should be repartitioned. Defaults
                to False.
            extra_topos (list of str): Additional topology (.str, .off, .lib) to
                include.
            extra_params (list of str): Additional parameter sets (.str, .frcmod)
            override_defaults (bool): If set, omits default amber ff14 parameters.
            debug_verbose (bool): Prints additional output, like from tleap.

        """

        self.molid = molid
        self.prmtop_name = ""

        self.tmp_dir = kwargs.get("tmp_dir", ".")
        self.lipid_sel = kwargs.get("lipid_sel", "lipid")
        self.hmr = kwargs.get("hmr", False)
        self.extra_topos = kwargs.get("extra_topos", None)
        self.override = kwargs.get("override_defaults", False)
        self.debug_verbose = kwargs.get("debug_verbose", False)

        forcefield = kwargs.get("forcefield", "charmm36m")
        if forcefield not in ["amber", "charmm36m", "charmm", "charmm36"]:
            raise DabbleError("Unsupported forcefield: %s" % forcefield)
        self.forcefield = forcefield
        if self.forcefield == "charmm36m":
            self.parameters = [
                resource_filename(__name__,
                                  "charmm_parameters/toppar_water_ions.str"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_cgenff.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36m_prot.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_lipid.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_carb.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_na.prm"),
                resource_filename(
                    __name__,
                    "charmm_parameters/toppar_all36_prot_na_combined.str")
            ]
            self.topologies = []
        elif self.forcefield in ["charmm36", "charmm"]:
            self.parameters = [
                resource_filename(__name__,
                                  "charmm_parameters/toppar_water_ions.str"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_cgenff.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_prot.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_lipid.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_carb.prm"),
                resource_filename(__name__,
                                  "charmm_parameters/par_all36_na.prm"),
                resource_filename(
                    __name__,
                    "charmm_parameters/toppar_all36_prot_na_combined.str")
            ]
            self.topologies = []
        elif self.forcefield == 'amber':
            if not os.environ.get("AMBERHOME"):
                raise DabbleError(
                    "AMBERHOME must be set to use AMBER forcefield!")
            if not os.path.isfile(
                    os.path.join(os.environ.get("AMBERHOME"), "bin", "tleap")):
                raise DabbleError("tleap is not present in $AMBERHOME/bin!")

            # Check amber version and set topologies accordingly
            self.topologies = [
                "leaprc.protein.ff14SB",
                "leaprc.lipid14",
                "leaprc.water.tip3p",
                "leaprc.gaff",
            ]
            for i, top in enumerate(self.topologies):
                self.topologies[i] = os.path.join(os.environ["AMBERHOME"],
                                                  "dat", "leap", "cmd", top)
                if not os.path.isfile(self.topologies[i]):
                    raise DabbleError("AMBER version too old! "
                                      "Dabble requires >= AmberTools16!")

            self.parameters = []
            self.matcher = None

        if self.override:
            self.topologies = []
            self.parameters = []

        if kwargs.get("extra_topos") is not None:
            self.topologies.extend(kwargs.get("extra_topos"))

        if kwargs.get("extra_params") is not None:
            self.parameters.extend(kwargs.get("extra_params"))

        self.prompt_params = False
Esempio n. 21
0
    def _run_leap(self, ligfiles, prot_pdbseqs, pdbs, conect):
        """
        Runs leap, creating a prmtop and inpcrd from the given pdb and off
        library files.

        Args:
            ligfiles (dict str -> str): UNIT name and filename of mol2 file
                for each ligand. The unit name is necessary here to add
                the right variable names in leap because it is the worst.
            prot_pdbseq (tuple str,str): PDB file containing protein fragments,
                sequence of UNITs for those fragments
            pdbs (list of str): PDB or Mol2 files to combine
            conect (set of int): Atom indices connected by an extraresidue bond

        Returns:
            (str) Prefix of file written

        Raises:
            ValueError if AMBERHOME is unset
            ValueError if topology type cannot be determined
        """
        # Ensure leap is actually available
        if not os.environ.get("AMBERHOME"):
            raise DabbleError("AMBERHOME must be set to use leap!")

        # Create the leap input file
        leapin = tempfile.mkstemp(suffix='.in',
                                  prefix='dabble_leap_',
                                  dir=self.tmp_dir)[1]
        with open(leapin, 'w') as fileh:
            for i in self.topologies + self.parameters:
                if "leaprc" in i:
                    fileh.write("source %s\n" % i)
                elif "frcmod" in i:
                    fileh.write("loadamberparams %s\n" % i)
                elif ".lib" in i:
                    fileh.write("loadoff %s\n" % i)
                elif ".off" in i:
                    continue
                else:
                    raise DabbleError("Unknown topology type: %s" % i)
            fileh.write('\n')

            # Add off files here
            for i in [
                    _ for _ in self.topologies + self.parameters if ".off" in _
            ]:
                fileh.write("loadoff %s\n" % i)

            pdbs = [_ for _ in pdbs if _ is not None]
            for i, pdb in enumerate(pdbs):
                if "pdb" in pdb:
                    fileh.write("p%s = loadpdb %s\n" % (i, pdb))
                elif "mol2" in pdb:
                    fileh.write("p%s = loadmol2 %s\n" % (i, pdb))
                else:
                    raise DabbleError("Unknown coordinate type: %s" % pdb)

            for i, f in enumerate(ligfiles):
                if "pdb" in f[0]:
                    fileh.write("l%s = loadpdbusingseq %s {%s}\n" %
                                (i, f[0], f[1]))
                elif "mol2" in f[0]:
                    fileh.write("l%s = loadmol2 %s\n" % (i, f[0]))
                else:
                    raise DabbleError("Unknown ligand file type: %s" % f[0])

            for i, pp in enumerate(prot_pdbseqs):
                fileh.write("pp%d = loadpdbusingseq %s { %s} \n" %
                            (i, pp[0], pp[1]))

            # Need to combine before creating bond lines since can't create
            # bonds between UNITs
            fileh.write(
                "p = combine { %s }\n" %
                ' '.join(["pp%d" % i for i in range(len(prot_pdbseqs))]))

            # Create bond lines
            while conect:
                # Pull out two atoms bound to each other
                idx = conect.pop()
                s1 = atomsel("index %d" % idx)
                other = [s for s in s1.bonds[0] if s in conect]
                if len(other) != 1:
                    raise ValueError("Problem with bonds to index %d" % idx)
                other = other[0]
                s2 = atomsel("index %d" % other)
                conect.remove(other)

                fileh.write("bond p.{0}.{1} p.{2}.{3}\n".format(
                    s1.get('resid')[0],
                    s1.get('name')[0],
                    s2.get('resid')[0],
                    s2.get('name')[0]))

            if len(pdbs):
                fileh.write("\np = combine { p %s }\n" %
                            ' '.join(["p%d" % i for i in range(len(pdbs))]))
            if len(ligfiles):
                fileh.write("p = combine { p %s }\n" %
                            ' '.join(["l%d" % i
                                      for i in range(len(ligfiles))]))
            fileh.write("setbox p centers 0.0\n")
            fileh.write("saveamberparm p %s.prmtop %s.inpcrd\n" %
                        (self.prmtop_name, self.prmtop_name))
            fileh.write("quit\n")
            fileh.close()

        # Now invoke leap. If it fails, print output
        out = ""
        try:
            out = check_output([
                os.path.join(os.environ.get("AMBERHOME"), "bin", "tleap"),
                "-f", leapin
            ]).decode("utf-8")
            out = "%s%s%s" % (
                "\n================BEGIN TLEAP OUTPUT================\n", out,
                "\n=================END TLEAP OUTPUT=================\n")

            if self.debug_verbose:
                print(out)
            if "not saved" in out:
                raise DabbleError("Tleap call failed")
        except:
            print(out)
            raise DabbleError(
                "Call to tleap failed! See above output for errors")
            quit(1)

        # Do a quick sanity check that all the protein is present.
        mademol = molecule.load("parm7", "%s.prmtop" % self.prmtop_name,
                                "rst7", "%s.inpcrd" % self.prmtop_name)
        if len(atomsel("resname %s" % " ".join(self.matcher._acids), mademol)) \
                != len(atomsel("resname %s" % " ".join(self.matcher._acids), self.molid)):
            print(out)
            raise DabbleError(
                "Not all protein was present in the output prmtop."
                " This indicates a problem with tleap. Check the "
                "above output, especially for covalent ligands. "
                "Is naming consistent in all .off files?")

        return self.prmtop_name
Esempio n. 22
0
    def _find_single_residue_names(self, resname, molid):
        """
        Uses graph matcher and available topologies to match up
        ligand names automatically. Tries to use graphs, and if there's an
        uneven number of atoms tries to match manually to suggest which atoms
        are most likely missing.

        Args:
          resname (str): Residue name of the ligand that will be written.
            All ligands will be checked separately against the graphs.
          molid (int): VMD molecule ID to consider

        Returns:
          (list of ints): Residue numbers (not resid) of all input ligands
            that were successfully matched. Need to do it this way since
            residue names can be changed in here to different things.

        Raises:
          ValueError if number of resids does not match number of residues as
            interpreted by VMD
          NotImplementedError if a residue could not be matched to a graph.
        """
        # Put our molecule on top
        old_top = molecule.get_top()
        molecule.set_top(molid)

        # Sanity check that there is no discrepancy between defined resids and
        # residues as interpreted by VMD.
        for chain in set(atomsel("user 1.0 and resname '%s'" % resname).get('chain')):
            residues = list(set(atomsel("user 1.0 and resname '%s' and chain %s"
                                        % (resname, chain)).get('residue')))
            resids = list(set(atomsel("user 1.0 and resname '%s' and chain %s"
                                      % (resname, chain)).get('resid')))
            if len(residues) != len(resids):
                raise DabbleError("VMD found %d residues for resname '%s', "
                                  "but there are %d resids! Check input."
                                  % (len(residues), resname,
                                     len(resids)))

        for residue in residues:
            sel = atomsel("residue %s and resname '%s' and user 1.0" % (residue, resname))
            (newname, atomnames) = self.matcher.get_names(sel, print_warning=True)
            if not newname:
                (resname, patch, atomnames) = self.matcher.get_patches(sel)
                if not newname:
                    print("ERROR: Could not find a residue definition for %s:%s"
                          % (resname, residue))
                    raise NotImplementedError("No residue definition for %s:%s"
                                              % (resname, residue))
                print("\tApplying patch %s to ligand %s" % (patch, newname))

            # Do the renaming
            for idx, name in atomnames.items():
                atom = atomsel('index %s' % idx)
                if atom.get('name')[0] != name and "+" not in name and \
                   "-" not in name:
                    print("Renaming %s:%s: %s -> %s" % (resname, residue,
                                                        atom.get('name')[0],
                                                        name))
                    atom.set('name', name)
            sel.set('resname', newname)

        #logger.info("Renamed %d atoms for all resname %s->%s" % (num_renamed, resname, name))
        molecule.set_top(old_top)

        return residues
Esempio n. 23
0
    def _parse_topology(self, filename):
        """
        Parses an amber topology file. More specifically, parses a leaprc
        file. The atom type definitions are in there as "addAtomTypes" command,
        and the topologies in the files specified with "loadOff" command.

        Args:
            filename (str): The file to parse

        Returns:
            True if successful

        Raises:
            DabbleError if topology file is malformed in various ways
            DabbleError if AMBERHOME is unset
        """
        if ".off" in filename or ".lib" in filename:
            self._load_off(filename)
        elif "frcmod" in filename:
            return self._load_params(filename)
        elif "leaprc" not in filename:
            raise DabbleError(
                "AmberMatcher only parses leaprc or frcmod topologies!"
                "Can't read topology '%s'" % filename)

        # Set AMBER search path for lib files
        if not os.environ.get("AMBERHOME"):
            raise DabbleError("AMBERHOME is unset!")
        leapdir = os.path.join(os.environ["AMBERHOME"], "dat", "leap")

        incmd = ""
        with open(filename, 'r') as fileh:
            for line in fileh:
                if "#" in line:
                    line = line[:line.index("#")]
                if not len(line):
                    continue
                tokens = [i.strip(" \t'\"\n") for i in line.split()]
                if not len(tokens):
                    continue

                # addAtomTypes adds more atoms
                if not incmd and tokens[0].lower() == "addatomtypes":
                    incmd = "addatomtypes"
                elif incmd == "addatomtypes":
                    # Line should look like: { "OG" "O" "sp3" }
                    # we need the first 2 things for atom name and element
                    if tokens[0] == "}":  # done with atom type definition
                        incmd = ""
                        continue
                    if tokens[0] != "{" or tokens[-1] != "}":
                        raise DabbleError("Malformed line in %s: %s" %
                                          (filename, line))
                    if not tokens[2]:
                        logger.warning("Ignoring pseudoatom %s", tokens[1])
                        continue

                    if tokens[2] not in self.MASS_LOOKUP.values() and \
                       tokens[2] not in self.LEAP_ELEMENTS.values():
                        raise DabbleError("Unknown element in %s\n: %s" %
                                          (filename, tokens[2]))
                    self.nodenames[tokens[1]] = tokens[2]

                # loadOff loads a topology library
                # search in current directory first, then libdir
                elif not incmd and tokens[0].lower() == "loadoff":
                    if len(tokens) < 2:
                        raise DabbleError("Malformed line in %s: %s" %
                                          (filename, line))
                    if os.path.isfile(tokens[1]):
                        self._load_off(tokens[1])
                    else:
                        self._load_off(os.path.join(leapdir, "lib", tokens[1]))

                # loadamberparamsloads a frcmod file, which
                # may define ions
                elif not incmd and tokens[0].lower() == "loadamberparams":
                    if len(tokens) < 2:
                        raise DabbleError("Malformed line in %s: %s" %
                                          (filename, line))
                    if os.path.isfile(tokens[1]):
                        self._load_params(tokens[1])
                    else:
                        self._load_params(
                            os.path.join(leapdir, "parm", tokens[1]))

                # can source other leaprc files within this one
                # search current directory first, then amber one
                elif not incmd and tokens[0].lower() == "source":
                    if os.path.isfile(tokens[1]):
                        self._parse_topology(tokens[1])
                    else:
                        self._parse_topology(
                            os.path.join(leapdir, "cmd", tokens[1]))
                elif incmd:
                    raise DabbleError("Unclosed command in %s" % filename)

        return True
Esempio n. 24
0
    def get_disulfide(self, selstring, fragment, molid):  #pylint: disable=too-many-locals
        """
        Checks if the selection corresponds to a cysteine in a disulfide bond.
        Sets the patch line appropriately and matches atom names using
        a subgraph match to the normal cysteine residue

        Args:
            selstring (str): Selection to check
            fragment (str): Fragment ID (to narrow down selection)
            molid (int): VMD molecule of entire system (needed for disu partner)

        Returns:
            (str, str, dict) resname matched, patch line to put directly
              into psfgen, name translation dictionary
       """
        selection = atomsel(selstring, molid=molid)

        # Check for the 3 join atoms corresponding to the disulfide bonds
        rgraph, _ = self.parse_vmd_graph(selection)
        externs = self.get_extraresidue_atoms(selection)
        if len(externs) != 3:
            return (None, None, None)

        # Check that it is a cysteine in some way shape or form
        # ie that it this residue is a subgraph of a cysteine
        truncated = nx.Graph(rgraph)
        truncated.remove_nodes_from([n for n in rgraph.nodes() if \
                                     rgraph.node[n]["residue"] != "self"])
        matches = {}
        for matchname in self._acids:
            graph = self.known_res.get(matchname)
            if not graph:
                continue

            matcher = isomorphism.GraphMatcher(graph, truncated, \
                        node_match=super(CharmmMatcher, self)._check_atom_match)
            if matcher.subgraph_is_isomorphic():
                matches[matchname] = matcher.match()

        if not matches:
            return (None, None, None)
        matchname = max(matches.keys(), key=(lambda x: len(self.known_res[x])))
        if matchname != "CYS":
            return (None, None, None)

        # Invert mapping so it's idx->name. It's currently backwards
        # because of the need to find a subgraph.
        atomnames = dict((v, k) for (k, v) in next(matches[matchname]).items())

        # Now we know it's a cysteine in a disulfide bond
        # Identify which resid and fragment corresponds to the other cysteine
        partners = [n for n in externs if \
                    atomsel("index %d" % n,
                            molid=molid).get("element")[0] == "S"]
        if not partners:
            raise DabbleError("3 bonded Cys %d isn't a valid disulfide!" %
                              selection.get('resid')[0])
        osel = atomsel("index %d" % partners[0], molid=molid)

        # Order so same DISU isn't listed twice
        fr1 = osel.get("fragment")[0]
        fr2 = selection.get("fragment")[0]
        if fr1 < fr2:
            first = osel
            second = selection
        elif fr1 > fr2:
            first = selection
            second = osel
        else:
            if osel.get("resid")[0] < selection.get("resid")[0]:
                first = osel
                second = selection
            else:
                first = selection
                second = osel

        patchline = "patch DISU P%d:%d P%d:%d\n" % (
            first.get("fragment")[0], first.get("resid")[0],
            second.get("fragment")[0], second.get("resid")[0])

        return (matchname, patchline, atomnames)
Esempio n. 25
0
    def _load_off(self, filename):
        """
        Parses an off format amber library file. Puts the resulting
        residue definitions into the known_res dictionary.

        Args:
            filename (str): The file to parse

        Returns:
            True if successful

        Raises:
            ValueError if off file is malformed in various ways
        """
        unit = ""
        incmd = ""
        cmdidx = 1

        with open(filename, 'r') as fileh:
            for line in fileh:
                if not len(line):
                    continue
                tokens = [i.strip(" \t\"\n") for i in line.split()]
                if not len(tokens) or not len(tokens[0]):
                    continue

                # If we find a command, pull out the unit name then figure
                # out what section is being defined
                if tokens[0][0] == "!" and tokens[0][1] != "!":
                    unit = tokens[0].split('.')[1]
                    if tokens[0] == "!entry.%s.unit.atoms" % unit:
                        incmd = "addatoms"
                    elif tokens[0] == "!entry.%s.unit.connectivity" % unit:
                        incmd = "addbonds"
                    elif tokens[0] == "!entry.%s.unit.connect" % unit:
                        incmd = "addextrabonds"
                    elif tokens[0] == "!entry.%s.unit.residues" % unit:
                        incmd = "name"
                    else:
                        incmd = "skip"
                    if not self.known_res.get(unit):
                        self.known_res[unit] = nx.Graph()

                    graph = self.known_res[unit]
                    cmdidx = 1
                    continue

                # Add atoms command
                if incmd == "addatoms":
                    # Define atom types if not present using element index
                    element = self.nodenames.get(tokens[1])
                    if not element:
                        element = self.LEAP_ELEMENTS.get(
                            int(tokens[6]), "Other")
                        self.nodenames[tokens[0]] = element

                    graph.add_node(
                        str(cmdidx),
                        type=tokens[1],
                        element=element,
                        resname=tokens[3],
                        residue=tokens[3],  # residue index, will be replaced
                        atomname=tokens[0])

                # Add bonds command
                elif incmd == "addbonds":
                    node1 = graph.node.get(tokens[0])
                    node2 = graph.node.get(tokens[1])
                    if not node1 or not node2:
                        print(node1, node2)
                        print(graph.node.keys())
                        raise DabbleError(
                            "Can't parse bond for unit %s, file %s\n"
                            "Line was: %s" % (unit, filename, line))
                    graph.add_edge(tokens[0], tokens[1])

                # Add externally bonded atoms command if there are actually
                # atoms, a 0 value here indicates no value. The - is listed before
                # the + so cmdidx is used to keep track of which one we're on
                elif incmd == "addextrabonds" and tokens[0] != "0":
                    if cmdidx == 1:
                        node1 = "-"
                    else:
                        node1 = "+"
                    graph.add_node(node1,
                                   atomname=node1,
                                   type="",
                                   residue=node1,
                                   element="_join")
                    if not graph.node.get(tokens[0]):
                        raise DabbleError("Can't parse extra residue bond for "
                                          "unit %s, file %s\nLine was: %s" %
                                          (unit, filename, line))
                    graph.add_edge(node1, tokens[0])

                elif incmd == "name":
                    for nod in (n for n in graph.nodes() if \
                              graph.node[n].get("residue") == tokens[1]):

                        # Sanity check residue name here
                        if "*" in tokens[0]:
                            raise DabbleError(
                                "You have a common error in your "
                                ".off file '%s'.\n The residue name "
                                "is invalid. Please check the first "
                                "field in the unit.residue section." %
                                filename)
                        graph.node[nod]["resname"] = tokens[0]
                        graph.node[nod]["residue"] = "self"

                cmdidx += 1

        return True
Esempio n. 26
0
    def _rtf_to_graph(self, data, resname, patch=None):  #pylint: disable=too-many-branches
        """
        Parses rtf text to a graph representation. If a graph to patch
        is provided, then patches that graph with this rtf data

        Args:
            data (str): The rtf data for this residue or patch
            resname (str): Residue name, from earlier parsing
            patch (networkx graph): The graph to apply patches to,
              or None if just parsing a residue. Will not be modified.

        Returns:
            (networkx graph): Graph representation of molecule, or None
              if it could not be converted (invalid patch)

        Raises:
            ValueError if rtf file is malformed in various ways
        """

        graph = nx.Graph(data=patch)
        firstcmap = True

        for line in data.splitlines():
            tokens = [i.strip().upper() for i in line.split()]

            # Atoms mean add node to current residue
            if tokens[0] == "ATOM":
                # Patches can change atom type
                # Technically re-adding the node will just change the type and
                # not add a duplicate, but this is more correct and clear.
                if tokens[1] in graph.nodes():
                    graph.node[tokens[1]]["type"] = tokens[2]
                else:
                    graph.add_node(tokens[1],
                                   type=tokens[2],
                                   residue="self",
                                   patched=bool(patch))

            # Bond or double means add edge to residue graph
            elif tokens[0] == "BOND" or tokens[0] == "DOUBLE":
                if len(tokens) % 2 == 0:
                    raise DabbleError("Unequal number of atoms in bond terms\n"
                                      "Line was:\n%s" % line)
                for txn in range(1, len(tokens), 2):
                    node1 = tokens[txn]
                    node2 = tokens[txn + 1]
                    if not _define_bond(graph, node1, node2, bool(patch)):
                        return None

            # CMAP terms add edges. This makes amino acids work since the
            # next and previous amino acids aren't defined as bonds usually
            elif tokens[0] == "CMAP":

                if len(tokens) == 1:  # CMAP parameter section follows, ignore
                    continue

                if firstcmap:
                    # Remove all +- join nodes on patching
                    joins = [
                        n for n in graph.nodes()
                        if graph.node[n]["residue"] != "self"
                    ]
                    graph.remove_nodes_from(joins)
                    firstcmap = False

                if len(tokens) != 9:  # CMAP requires 2 dihedrals
                    raise DabbleError("Incorrect CMAP line\n"
                                      "Line was:\n%s" % line)
                tokens = tokens[1:]
                nodes = [(tokens[3*j+i], tokens[3*j+i+1]) \
                         for j in range(int(len(tokens)/4)) \
                         for i in range(j, j+3)]  # oo i love one liners
                for (node1, node2) in nodes:
                    if not _define_bond(graph, node1, node2, bool(patch)):
                        return None

            # Check for atom definitions
            elif tokens[0] == "MASS":
                if self.nodenames.get(tokens[2]):
                    logger.info("Skipping duplicate type %s", tokens[2])
                else:
                    self.nodenames[tokens[2]] = \
                            MoleculeMatcher.get_element(float(tokens[3]))

            # Patches can delete atoms
            elif tokens[0] == "DELETE" or tokens[0] == "DELE":
                if not patch:
                    raise ValueError("DELETE only supported in patches!\n"
                                     "Line was:\n%s" % line)

                # Sometimes delete has a number in front of the atom name
                try:
                    if tokens[1] == "ATOM":
                        if tokens[2][0].isdigit():
                            tokens[2] = tokens[2][1:]
                        graph.remove_node(tokens[2])
                    elif tokens[1] == "BOND":
                        if tokens[2][0].isdigit():
                            tokens[2] = tokens[2][1:]
                        if tokens[3][0].isdigit():
                            tokens[3] = tokens[3][1:]

                        graph.remove_edge(tokens[2], tokens[3])

                # Atom or bond did not exist, ie this patch is invalid
                except nx.NetworkXError:
                    return None

        # Assign resname to all atoms
        nx.set_node_attributes(graph, name="resname", values=resname)

        # If we didn't patch, set the whole residue to unpatched atom attribute
        if not patch:
            nx.set_node_attributes(graph, name="patched", values=False)

        return graph