Beispiel #1
0
    def read_topology_file(self, tfile):
        """
        Reads _only_ the atom type definitions from a topology file. This is
        unnecessary for versions 36 and later of the CHARMM force field.

        Parameters
        ----------
        tfile : str
            Name of the CHARMM topology file to read
        """
        conv = CharmmParameterSet._convert
        if isinstance(tfile, str):
            own_handle = True
            f = iter(CharmmFile(tfile))
        else:
            own_handle = False
            f = tfile
        hpatch = tpatch = None  # default Head and Tail patches
        residues = dict()
        patches = dict()
        hpatches = dict()
        tpatches = dict()
        line = next(f)
        try:
            while line:
                line = line.strip()
                if line[:4] == "MASS":
                    words = line.split()
                    try:
                        idx = conv(words[1], int, "atom type")
                        name = words[2].upper()
                        mass = conv(words[3], float, "atom mass")
                    except IndexError:
                        raise CharmmError("Could not parse MASS section of %s" % tfile)
                    # The parameter file might or might not have an element name
                    try:
                        elem = words[4].upper()
                        if len(elem) == 2:
                            elem = elem[0] + elem[1].lower()
                        atomic_number = AtomicNum[elem]
                    except (IndexError, KeyError):
                        # Figure it out from the mass
                        atomic_number = AtomicNum[element_by_mass(mass)]
                    atype = AtomType(name=name, number=idx, mass=mass, atomic_number=atomic_number)
                    self.atom_types_str[atype.name] = atype
                    self.atom_types_int[atype.number] = atype
                    self.atom_types_tuple[(atype.name, atype.number)] = atype
                elif line[:4] == "DECL":
                    pass  # Not really sure what this means
                elif line[:4] == "DEFA":
                    words = line.split()
                    if len(words) < 5:
                        warnings.warn("DEFA line has %d tokens; expected 5" % len(words))
                    else:
                        it = iter(words[1:5])
                        for tok, val in zip(it, it):
                            if val.upper() == "NONE":
                                val = None
                            if tok.upper().startswith("FIRS"):
                                hpatch = val
                            elif tok.upper() == "LAST":
                                tpatch = val
                            else:
                                warnings.warn("DEFA patch %s unknown" % val)
                elif line[:4].upper() in ("RESI", "PRES"):
                    restype = line[:4].upper()
                    # Get the residue definition
                    words = line.split()
                    resname = words[1].upper()
                    # Assign default patches
                    hpatches[resname] = hpatch
                    tpatches[resname] = tpatch
                    try:
                        charge = float(words[2])
                    except (IndexError, ValueError):
                        warnings.warn("No charge for %s" % resname)
                    if restype == "RESI":
                        res = ResidueTemplate(resname)
                    elif restype == "PRES":
                        res = PatchTemplate(resname)
                    else:
                        assert False, "restype != RESI or PRES"
                    line = next(f)
                    group = []
                    ictable = []
                    while line:
                        line = line.lstrip()
                        if line[:5].upper() == "GROUP":
                            if group:
                                res.groups.append(group)
                            group = []
                        elif line[:4].upper() == "ATOM":
                            words = line.split()
                            name = words[1].upper()
                            type = words[2].upper()
                            charge = float(words[3])
                            atom = Atom(name=name, type=type, charge=charge)
                            group.append(atom)
                            res.add_atom(atom)
                        elif line.strip() and line.split()[0].upper() in ("BOND", "DOUBLE"):
                            it = iter([w.upper() for w in line.split()[1:]])
                            for a1, a2 in zip(it, it):
                                if a1.startswith("-"):
                                    res.head = res[a2]
                                    continue
                                if a2.startswith("-"):
                                    res.head = res[a1]
                                    continue
                                if a1.startswith("+"):
                                    res.tail = res[a2]
                                    continue
                                if a2.startswith("+"):
                                    res.tail = res[a1]
                                    continue
                                # Apparently PRES objects do not need to put +
                                # or - in front of atoms that belong to adjacent
                                # residues
                                if restype == "PRES" and (a1 not in res or a2 not in res):
                                    continue
                                res.add_bond(a1, a2)
                        elif line[:4].upper() == "CMAP":
                            pass
                        elif line[:5].upper() == "DONOR":
                            pass
                        elif line[:6].upper() == "ACCEPT":
                            pass
                        elif line[:2].upper() == "IC":
                            words = line.split()[1:]
                            ictable.append(([w.upper() for w in words[:4]], [float(w) for w in words[4:]]))
                        elif line[:3].upper() == "END":
                            break
                        elif line[:5].upper() == "PATCH":
                            it = iter(line.split()[1:])
                            for tok, val in zip(it, it):
                                if val.upper() == "NONE":
                                    val = None
                                if tok.upper().startswith("FIRS"):
                                    hpatches[resname] = val
                                elif tok.upper().startswith("LAST"):
                                    tpatches[resname] = val
                        elif line[:5].upper() == "DELETE":
                            pass
                        elif line[:4].upper() == "IMPR":
                            it = iter([w.upper() for w in line.split()[1:]])
                            for a1, a2, a3, a4 in zip(it, it, it, it):
                                if a2[0] == "-" or a3[0] == "-" or a4 == "-":
                                    res.head = res[a1]
                        elif line[:4].upper() in ("RESI", "PRES", "MASS"):
                            # Back up a line and bail
                            break
                        line = next(f)
                    if group:
                        res.groups.append(group)
                    _fit_IC_table(res, ictable)
                    if restype == "RESI":
                        residues[resname] = res
                    elif restype == "PRES":
                        patches[resname] = res
                    else:
                        assert False, "restype != RESI or PRES"
                    # We parsed a line we need to look at. So don't update the
                    # iterator
                    continue
                # Get the next line and cycle through
                line = next(f)
        except StopIteration:
            pass

        # Go through the patches and add the appropriate one
        for resname, res in iteritems(residues):
            if hpatches[resname] is not None:
                try:
                    res.first_patch = patches[hpatches[resname]]
                except KeyError:
                    warnings.warn("Patch %s not found" % hpatches[resname])
            if tpatches[resname] is not None:
                try:
                    res.last_patch = patches[tpatches[resname]]
                except KeyError:
                    warnings.warn("Patch %s not found" % tpatches[resname])
        # Now update the residues and patches with the ones we parsed here
        self.residues.update(residues)
        self.patches.update(patches)

        if own_handle:
            f.close()
Beispiel #2
0
    def parse(filename, structure=False):
        """ Parses a mol2 file (or mol3) file

        Parameters
        ----------
        filename : str or file-like
            Name of the file to parse or file-like object to parse from
        structure : bool, optional
            If True, the return value is a :class:`Structure` instance. If
            False, it is either a :class:`ResidueTemplate` or
            :class:`ResidueTemplateContainter` instance, depending on whether
            there is one or more than one residue defined in it. Default is
            False

        Returns
        -------
        molecule : :class:`Structure`, :class:`ResidueTemplate`, or
                   :class:`ResidueTemplateContainer`
            The molecule defined by this mol2 file

        Raises
        ------
        Mol2Error
            If the file format is not recognized or non-numeric values are
            present where integers or floating point numbers are expected. Also
            raises Mol2Error if you try to parse a mol2 file that has multiple
            @<MOLECULE> entries with ``structure=True``.
        """
        if isinstance(filename, string_types):
            f = genopen(filename, 'r')
            own_handle = True
        else:
            f = filename
            own_handle = False
        rescont = ResidueTemplateContainer()
        struct = Structure()
        restemp = ResidueTemplate()
        mol_info = []
        multires_structure = False
        try:
            section = None
            last_residue = None
            headtail = 'head'
            molecule_number = 0
            for line in f:
                if line.startswith('#'): continue
                if not line.strip() and section is None: continue
                if line.startswith('@<TRIPOS>'):
                    section = line[9:].strip()
                    if section == 'MOLECULE' and (restemp.atoms or rescont):
                        if structure:
                            raise Mol2Error('Cannot convert MOL2 with multiple '
                                            '@<MOLECULE>s to a Structure')
                        # Set the residue name from the MOL2 title if the
                        # molecule had only 1 residue and it was given a name in
                        # the title
                        if not multires_structure and mol_info[0]:
                            restemp.name = mol_info[0]
                        multires_structure = False
                        rescont.append(restemp)
                        restemp = ResidueTemplate()
                        struct = Structure()
                        last_residue = None
                        molecule_number += 1
                        mol_info = []
                    continue
                if section is None:
                    raise Mol2Error('Bad mol2 file format')
                if section == 'MOLECULE':
                    # Section formatted as follows:
                    #   mol_name
                    #   num_atoms [num_bonds [num_substr [num_feat [num_sets]]]]
                    #   mol_type
                    #   charge_type
                    #   [status_bits]
                    #   [mol_comment]
                    # TODO: Do something with the name.
                    if len(mol_info) == 0:
                        mol_info.append(line.strip())
                    elif len(mol_info) == 1:
                        mol_info.append([int(x) for x in line.split()])
                    elif len(mol_info) == 2:
                        mol_info.append(line.strip())
                    elif len(mol_info) == 3:
                        mol_info.append(line.strip())
                    # Ignore the rest
                    continue
                if section == 'ATOM':
                    # Section formatted as follows:
                    #   atom_id -- serial number of atom
                    #   atom_name -- name of the atom
                    #   x -- X-coordinate of the atom
                    #   y -- Y-coordinate of the atom
                    #   z -- Z-coordinate of the atom
                    #   atom_type -- type of the atom
                    #   subst_id -- Residue serial number
                    #   subst_name -- Residue name
                    #   charge -- partial atomic charge
                    #   status_bit -- ignored
                    words = line.split()
                    id = int(words[0])
                    name = words[1]
                    x = float(words[2])
                    y = float(words[3])
                    z = float(words[4])
                    typ = words[5]
                    try:
                        resid = int(words[6])
                    except IndexError:
                        resid = 0
                    try:
                        resname = words[7]
                    except IndexError:
                        resname = 'UNK'
                    if 'NO_CHARGES' not in mol_info:
                        try:
                            charge = float(words[8])
                        except IndexError:
                            charge = 0
                    else:
                        charge = 0
                    if last_residue is None:
                        last_residue = (resid, resname)
                        restemp.name = resname
                    atom = Atom(name=name, type=typ, number=id, charge=charge)
                    atom.xx, atom.xy, atom.xz = x, y, z
                    struct.add_atom(atom, resname, resid)
                    if last_residue != (resid, resname):
                        rescont.append(restemp)
                        restemp = ResidueTemplate()
                        restemp.name = resname
                        last_residue = (resid, resname)
                        multires_structure = True
                    restemp.add_atom(copy.copy(atom))
                    continue
                if section == 'BOND':
                    # Section formatted as follows:
                    #   bond_id -- serial number of bond (ignored)
                    #   origin_atom_id -- serial number of first atom in bond
                    #   target_atom_id -- serial number of other atom in bond
                    #   bond_type -- string describing bond type (ignored)
                    #   status_bits -- ignored
                    words = line.split()
                    int(words[0]) # Bond serial number... redundant and ignored
                    a1 = int(words[1])
                    a2 = int(words[2])
                    atom1 = struct.atoms.find_original_index(a1)
                    atom2 = struct.atoms.find_original_index(a2)
                    struct.bonds.append(Bond(atom1, atom2))
                    # Now add it to our residue container
                    # See if it's a head/tail connection
                    if atom1.residue is not atom2.residue:
                        if atom1.residue.idx == len(rescont):
                            res1 = restemp
                        elif atom1.residue.idx < len(rescont):
                            res1 = rescont[atom1.residue.idx]
                        else:
                            raise Mol2Error('Bad bonding pattern detected')
                        if atom2.residue.idx == len(rescont):
                            res2 = restemp
                        elif atom1.residue.idx < len(rescont):
                            res2 = rescont[atom2.residue.idx]
                        else:
                            raise Mol2Error('Bad bonding pattern detected')
                        assert res1 is not res2, 'BAD identical residues'
                        idx1 = atom1.idx - atom1.residue[0].idx
                        idx2 = atom2.idx - atom2.residue[0].idx
                        if atom1.residue.idx < atom2.residue.idx:
                            res1.tail = res1[idx1]
                            res2.head = res2[idx2]
                        else:
                            res1.head = res1[idx1]
                            res2.tail = res2[idx2]
                    elif not multires_structure:
                        restemp.add_bond(a1-1, a2-1)
                    else:
                        # Same residue, add the bond
                        offset = atom1.residue[0].idx
                        if atom1.residue.idx == len(rescont):
                            res = restemp
                        else:
                            res = rescont[atom1.residue.idx]
                        res.add_bond(atom1.idx-offset, atom2.idx-offset)
                    continue
                if section == 'CRYSIN':
                    # Section formatted as follows:
                    #   a -- length of first unit cell vector
                    #   b -- length of second unit cell vector
                    #   c -- length of third unit cell vector
                    #   alpha -- angle b/w b and c
                    #   beta -- angle b/w a and c
                    #   gamma -- angle b/w a and b
                    #   space group -- number of space group (ignored)
                    #   space group setting -- ignored
                    words = line.split()
                    box = [float(x) for x in words[:6]]
                    if len(box) != 6:
                        raise ValueError('%d box dimensions found; needed 6' %
                                         len(box))
                    struct.box = copy.copy(box)
                    rescont.box = copy.copy(box)
                    continue
                if section == 'SUBSTRUCTURE':
                    # Section formatted as follows:
                    #   subst_id -- residue number
                    #   subst_name -- residue name
                    #   root_atom -- first atom of residue
                    #   subst_type -- ignored (usually 'RESIDUE')
                    #   dict_type -- type of substructure (ignored)
                    #   chain -- chain ID of residue
                    #   sub_type -- type of the chain
                    #   inter_bonds -- # of inter-substructure bonds
                    #   status -- ignored
                    #   comment -- ignored
                    words = line.split()
                    if not words: continue
                    id = int(words[0])
                    resname = words[1]
                    try:
                        chain = words[5]
                    except IndexError:
                        chain = ''
                    # Set the chain ID
                    for res in struct.residues:
                        if res.number == id and res.name == resname:
                            res.chain = chain
                    continue
                # MOL3 sections
                if section == 'HEADTAIL':
                    atname, residx = line.split()
                    residx = int(residx)
                    if residx in (0, 1) or residx - 1 == len(rescont):
                        res = restemp
                    elif residx - 1 < len(rescont):
                        res = rescont[residx-1]
                    else:
                        raise Mol2Error('Residue out of range in head/tail')
                    for atom in res:
                        if atom.name == atname:
                            if headtail == 'head':
                                res.head = atom
                                headtail = 'tail'
                            else:
                                res.tail = atom
                                headtail = 'head'
                            break
                    else:
                        if headtail == 'head':
                            headtail = 'tail'
                        else:
                            headtail = 'head'
                    continue
                if section == 'RESIDUECONNECT':
                    words = line.split()
                    residx = int(words[0])
                    if residx - 1 == len(rescont):
                        res = restemp
                    elif residx - 1 < len(rescont):
                        res = rescont[residx-1]
                    else:
                        raise Mol2Error('Residue out of range in '
                                        'residueconnect')
                    for a in words[3:]:
                        if a == '0': continue
                        for atom in res:
                            if atom.name == a:
                                atom.connections.append(atom)
                                break
                        else:
                            raise Mol2Error('Residue connection atom %s not '
                                            'found in residue %d' % (a, residx))
            if structure:
                return struct
            elif len(rescont) > 0:
                if not multires_structure and mol_info[0]:
                    restemp.name = mol_info[0]
                rescont.append(restemp)
                return rescont
            else:
                return restemp
        except ValueError as e:
            raise Mol2Error('String conversion trouble: %s' % e)
        finally:
            if own_handle: f.close()