Example #1
0
    def _write_residue(dest, res):
        """ Writes a residue to an open file handle

        Parameters
        ----------
        dest : file-like
            File object to write the residue information to
        res : :class:`ResidueTemplate` or :class:`ResidueTemplateContainer`
            The residue template (or template container) to write to the file
        """
        if isinstance(res, ResidueTemplate):
            # Put it into a template container with the same name
            tmp = ResidueTemplateContainer(res.name)
            tmp.append(res)
            res = tmp
        dest.write('!entry.%s.unit.atoms table  str name  str type  int typex  '
                   'int resx  int flags  int seq  int elmnt  dbl chg\n' %
                   res.name)
        for i, r in enumerate(res):
            for atom in r:
                dest.write(' "%s" "%s" 0 %d 131072 %d %d %.6f\n' % (atom.name,
                           atom.type, i+1, atom.idx+1, atom.atomic_number,
                           atom.charge))
        dest.write('!entry.%s.unit.atomspertinfo table  str pname  str ptype  '
                   'int ptypex  int pelmnt  dbl pchg\n' % res.name)
        for r in res:
            for atom in r:
                dest.write(' "%s" "%s" 0 -1 0.0\n' % (atom.name, atom.type))
        dest.write('!entry.%s.unit.boundbox array dbl\n' % res.name)
        if res.box is None:
            dest.write((' -1.000000\n' + ' 0.0\n' * 4))
        else:
            dest.write(' 1.000000\n')
            if res.box[3] == res.box[4] == res.box[5]:
                dest.write(' %f\n' % res.box[3])
            else:
                raise ValueError('Cannot write boxes with different angles')
            dest.write(' %f\n' % res.box[0])
            dest.write(' %f\n' % res.box[1])
            dest.write(' %f\n' % res.box[2])
        dest.write('!entry.%s.unit.childsequence single int\n %d\n' %
                   (res.name, len(res)+1))
        dest.write('!entry.%s.unit.connect array int\n' % res.name)
        if len(res) > 1:
            dest.write(' 0\n 0\n')
        else:
            if res[0].head is not None:
                dest.write(' %d\n' % (res[0].head.idx + 1))
            else:
                dest.write(' 0\n')
            if res[0].tail is not None:
                dest.write(' %d\n' % (res[0].tail.idx + 1))
            else:
                dest.write(' 0\n')
        dest.write('!entry.%s.unit.connectivity table  int atom1x  int atom2x  '
                   'int flags\n' % res.name)
        for r in res:
            for bond in r.bonds:
                dest.write(' %d %d 1\n' % (bond.atom1.idx+1, bond.atom2.idx+1))
        dest.write('!entry.%s.unit.hierarchy table  str abovetype  int '
                   'abovex  str belowtype  int belowx\n' % res.name)
        c = 1
        for i, r in enumerate(res):
            dest.write(' "U" 0 "R" %d\n' % (i+1))
            for atom in r:
                dest.write(' "R" %d "A" %d\n' % (i+1, c))
                c += 1
        dest.write('!entry.%s.unit.name single str\n' % res.name)
        dest.write(' "%s"\n' % res.name)
        dest.write('!entry.%s.unit.positions table  dbl x  dbl y  dbl z\n' %
                   res.name)
        for r in res:
            for atom in r:
                dest.write(' %.6g %.6g %.6g\n' % (atom.xx, atom.xy, atom.xz))
        dest.write('!entry.%s.unit.residueconnect table  int c1x  int c2x  '
                   'int c3x  int c4x  int c5x  int c6x\n' % res.name)
        for r in res:
            # Make the CONECT1 and 0 default to 1 so that the TREE gets set
            # correctly by tleap. Not used for anything else...
            conn = [1, 1, 0, 0, 0, 0]
            if r.head is not None: conn[0] = r.head.idx + 1
            if r.tail is not None: conn[1] = r.tail.idx + 1
            for i, at in enumerate(r.connections):
                conn[i+2] = at.idx + 1
            dest.write(' %d %d %d %d %d %d\n' % tuple(conn))
        dest.write('!entry.%s.unit.residues table  str name  int seq  int '
                   'childseq  int startatomx  str restype  int imagingx\n' %
                   res.name)
        c = 1
        for i, r in enumerate(res):
            if r.type is PROTEIN:
                typ = 'p'
            elif r.type is NUCLEIC:
                typ = 'n'
            elif r.type is SOLVENT:
                typ='w'
            elif r.type is UNKNOWN:
                typ='?'
            else:
                warnings.warn('Unrecognized residue type %r' % r.type,
                              AmberWarning)
                typ = '?'
            dest.write(' "%s" %d %d %d "%s" %d\n' % (r.name, i+1, 1+len(r), c,
                       typ, _imaging_atom(r)))
            c += len(r)
        dest.write('!entry.%s.unit.residuesPdbSequenceNumber array int\n' %
                   res.name)
        for i, r in enumerate(res):
            if len(res) == 1:
                dest.write(' 0\n')
            else:
                dest.write(' %d\n' % (i+1))
        dest.write('!entry.%s.unit.solventcap array dbl\n' % res.name)
        dest.write(' -1.000000\n' + ' 0.0\n' * 4)
        dest.write('!entry.%s.unit.velocities table  dbl x  dbl y  dbl z\n' %
                   res.name)
        for r in res:
            for atom in r:
                try:
                    s = ' %g %g %g\n' % (atom.vx, atom.vy, atom.vz)
                except AttributeError:
                    dest.write(' 0.0 0.0 0.0\n')
                else:
                    dest.write(s)
Example #2
0
    def _parse_residue(fileobj, name):
        """
        Parses the residue information out of the OFF file assuming the file
        is pointed at the first line of an atoms table section of the OFF file

        Parameters
        ----------
        fileobj : file-like
            Assumed to be open for read, this file is parsed until the *next*
            atom table is read
        name : str
            The name of the residue being processed right now
        """
        container = ResidueTemplateContainer(name)
        nres = 1
        templ = ResidueTemplate(name)
        line = fileobj.readline()
        while line[0] != '!':
            nam, typ, typx, resx, flags, seq, elmnt, chg = line.split()
            nam = _strip_enveloping_quotes(nam)
            typ = _strip_enveloping_quotes(typ)
            typx = int(typx)
            resx = int(resx)
            flags = int(flags)
            seq = int(seq)
            elmnt = int(elmnt)
            chg = float(chg)
            atom = Atom(atomic_number=elmnt, type=typ, name=nam, charge=chg)
            if resx == nres + 1:
                container.append(templ)
                nres += 1
                templ = ResidueTemplate(name)
            templ.add_atom(atom)
            line = fileobj.readline()
        container.append(templ)
        if nres > 1:
            start_atoms = []
            runsum = 0
            for res in container:
                start_atoms.append(runsum)
                runsum += len(res)
        # Make sure we get the next section
        rematch = AmberOFFLibrary._sec2re.match(line)
        if not rematch:
            raise RuntimeError('Expected pertinfo table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        line = fileobj.readline()
        while line[0] != '!':
            if not line:
                raise RuntimeError('Unexpected EOF in Amber OFF library')
            # Not used, just skip
            # TODO sanity check
            line = fileobj.readline()
        rematch = AmberOFFLibrary._sec3re.match(line)
        if not rematch:
            raise RuntimeError('Expected boundbox table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        # Only 5 lines
        try:
            hasbox = float(fileobj.readline().strip())
            angle = float(fileobj.readline().strip())
            a = float(fileobj.readline().strip())
            b = float(fileobj.readline().strip())
            c = float(fileobj.readline().strip())
        except ValueError:
            raise RuntimeError('Error processing boundbox table entries')
        else:
            if hasbox > 0:
                angle *= RAD_TO_DEG
                container.box = [a, b, c, angle, angle, angle]
        # Get the child sequence entry
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec4re.match(line)
        if not rematch:
            raise RuntimeError('Expected childsequence table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        n = int(fileobj.readline().strip())
        if nres + 1 != n:
            warnings.warn('Unexpected childsequence (%d); expected %d for '
                          'residue %s' % (n, nres+1, name), AmberWarning)
        elif not isinstance(templ, ResidueTemplate) and n != len(templ) + 1:
            raise RuntimeError('child sequence must be 1 greater than the '
                               'number of residues in the unit')
        # Get the CONNECT array to set head and tail
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec5re.match(line)
        if not rematch:
            raise RuntimeError('Expected connect array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        try:
            head = int(fileobj.readline().strip())
            tail = int(fileobj.readline().strip())
        except ValueError:
            raise RuntimeError('Error processing connect table entries')
        if head > 0 and nres == 1:
            templ.head = templ[head-1]
        elif head > 0 and nres > 1:
            if head < sum([len(r) for r in container]):
                raise RuntimeError('HEAD on multi-residue unit not supported')
        if tail > 0 and nres == 1:
            templ.tail = templ[tail-1]
        elif tail > 0 and nres > 1:
            if tail < sum([len(r) for r in container]):
                warnings.warn('TAIL on multi-residue unit not supported (%s). '
                              'Ignored...' % name, AmberWarning)
        # Get the connectivity array to set bonds
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec6re.match(line)
        if not rematch:
            raise RuntimeError('Expected connectivity table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        line = fileobj.readline()
        while line[0] != '!':
            i, j, flag = line.split()
            line = fileobj.readline()
            if nres > 1:
                # Find which residue we belong in
                i = int(i) - 1
                j = int(j) - 1
                for ii, idx in enumerate(start_atoms):
                    if idx > i:
                        ii -= 1
                        break
                start_idx = start_atoms[ii]
                container[ii].add_bond(i-start_idx, j-start_idx)
            else:
                templ.add_bond(int(i)-1, int(j)-1)
        # Get the hierarchy table
        rematch = AmberOFFLibrary._sec7re.match(line)
        if not rematch:
            raise RuntimeError('Expected hierarchy table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        line = fileobj.readline()
        while line[0] != '!':
            # Skip this section... not used
            # TODO turn this into a sanity check
            line = fileobj.readline()
        # Get the unit name
        rematch = AmberOFFLibrary._sec8re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit name string not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        fileobj.readline() # Skip this... not used
        line = fileobj.readline()
        # Get the atomic positions
        rematch = AmberOFFLibrary._sec9re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit positions table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for res in container:
            for atom in res:
                x, y, z = fileobj.readline().split()
                atom.xx, atom.xy, atom.xz = float(x), float(y), float(z)
        line = fileobj.readline()
        # Get the residueconnect table
        rematch = AmberOFFLibrary._sec10re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit residueconnect table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            c1,c2,c3,c4,c5,c6 = [int(x) for x in fileobj.readline().split()]
            if templ.head is not None and templ.head is not templ[c1-1]:
                warnings.warn('HEAD atom is not connect0')
            if templ.tail is not None and templ.tail is not templ[c2-1]:
                warnings.warn('TAIL atom is not connect1')
            for i in (c3, c4, c5, c6):
                if i == 0: continue
                templ.connections.append(templ[i-1])
        # Get the residues table
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec11re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit residues table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            resname, id, next, start, typ, img = fileobj.readline().split()
            resname = _strip_enveloping_quotes(resname)
            id = int(id)
            start = int(start)
            next = int(next)
            typ = _strip_enveloping_quotes(typ)
            img = int(img)
            if next - start != len(container[i]):
                warnings.warn('residue table predicted %d, not %d atoms for '
                              'residue %s' % (next-start, len(container[i]),
                              name), AmberWarning)
            if typ == 'p':
                container[i].type = PROTEIN
            elif typ == 'n':
                container[i].type = NUCLEIC
            elif typ == 'w':
                container[i].type = SOLVENT
            elif typ != '?':
                warnings.warn('Unknown residue type "%s"' % typ,
                              AmberWarning)
            if nres > 1:
                container[i].name = resname
        # Get the residues sequence table
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec12re.match(line)
        if not rematch:
            raise RuntimeError('Expected residue sequence number not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            #TODO sanity check
            fileobj.readline()
        line = fileobj.readline()
        # Get the solventcap array
        rematch = AmberOFFLibrary._sec13re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit solventcap array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        # Ignore the solvent cap
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        # Velocities
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec14re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit solventcap array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for res in container:
            for atom in res:
                vx, vy, vz = [float(x) for x in fileobj.readline().split()]
                atom.vx, atom.vy, atom.vz = vx, vy, vz

        if nres > 1:
            return container
        return templ
Example #3
0
    def _write_residue(dest, res):
        """ Writes a residue to an open file handle

        Parameters
        ----------
        dest : file-like
            File object to write the residue information to
        res : :class:`ResidueTemplate` or :class:`ResidueTemplateContainer`
            The residue template (or template container) to write to the file
        """
        if isinstance(res, ResidueTemplate):
            # Put it into a template container with the same name
            tmp = ResidueTemplateContainer(res.name)
            tmp.append(res)
            res = tmp
        dest.write('!entry.%s.unit.atoms table  str name  str type  int typex  '
                   'int resx  int flags  int seq  int elmnt  dbl chg\n' %
                   res.name)
        for i, r in enumerate(res):
            for atom in r:
                dest.write(' "%s" "%s" 0 %d 131072 %d %d %.6f\n' % (atom.name,
                           atom.type, i+1, atom.idx+1, atom.atomic_number,
                           atom.charge))
        dest.write('!entry.%s.unit.atomspertinfo table  str pname  str ptype  '
                   'int ptypex  int pelmnt  dbl pchg\n' % res.name)
        for r in res:
            for atom in r:
                dest.write(' "%s" "%s" 0 -1 0.0\n' % (atom.name, atom.type))
        dest.write('!entry.%s.unit.boundbox array dbl\n' % res.name)
        if res.box is None:
            dest.write((' -1.000000\n' + ' 0.0\n' * 4))
        else:
            dest.write(' 1.000000\n')
            if res.box[3] == res.box[4] == res.box[5]:
                dest.write(' %f\n' % res.box[3])
            else:
                raise RuntimeError('Cannot write boxes with different angles')
            dest.write(' %f\n' % res.box[0])
            dest.write(' %f\n' % res.box[1])
            dest.write(' %f\n' % res.box[2])
        dest.write('!entry.%s.unit.childsequence single int\n %d\n' %
                   (res.name, len(res)+1))
        dest.write('!entry.%s.unit.connect array int\n' % res.name)
        if len(res) > 1:
            dest.write(' 0\n 0\n')
        else:
            if res[0].head is not None:
                dest.write(' %d\n' % (res[0].head.idx + 1))
            else:
                dest.write(' 0\n')
            if res[0].tail is not None:
                dest.write(' %d\n' % (res[0].tail.idx + 1))
            else:
                dest.write(' 0\n')
        if any(len(r) > 1 for r in res):
            dest.write('!entry.%s.unit.connectivity table  int atom1x  '
                       'int atom2x  int flags\n' % res.name)
            base = 1
            for r in res:
                for bond in r.bonds:
                    dest.write(' %d %d 1\n' % (bond.atom1.idx+base,
                                               bond.atom2.idx+base))
                base += len(r)
        dest.write('!entry.%s.unit.hierarchy table  str abovetype  int '
                   'abovex  str belowtype  int belowx\n' % res.name)
        c = 1
        for i, r in enumerate(res):
            dest.write(' "U" 0 "R" %d\n' % (i+1))
            for atom in r:
                dest.write(' "R" %d "A" %d\n' % (i+1, c))
                c += 1
        dest.write('!entry.%s.unit.name single str\n' % res.name)
        dest.write(' "%s"\n' % res.name)
        dest.write('!entry.%s.unit.positions table  dbl x  dbl y  dbl z\n' %
                   res.name)
        for r in res:
            for atom in r:
                dest.write(' %.6g %.6g %.6g\n' % (atom.xx, atom.xy, atom.xz))
        dest.write('!entry.%s.unit.residueconnect table  int c1x  int c2x  '
                   'int c3x  int c4x  int c5x  int c6x\n' % res.name)
        c = 1
        for r in res:
            # Make the CONECT1 and 0 default to first and last atom so that the
            # TREE gets set correctly by tleap. Not used for anything else...
            conn = [c, c+len(r)-1, 0, 0, 0, 0]
            if r.head is not None: conn[0] = r.head.idx + 1
            if r.tail is not None: conn[1] = r.tail.idx + 1
            for i, at in enumerate(r.connections[:4]):
                conn[i+2] = at.idx + 1
            dest.write(' %d %d %d %d %d %d\n' % tuple(conn))
            c += len(r)
        dest.write('!entry.%s.unit.residues table  str name  int seq  int '
                   'childseq  int startatomx  str restype  int imagingx\n' %
                   res.name)
        c = 1
        for i, r in enumerate(res):
            if r.type is PROTEIN:
                typ = 'p'
            elif r.type is NUCLEIC:
                typ = 'n'
            elif r.type is SOLVENT:
                typ='w'
            elif r.type is UNKNOWN:
                typ='?'
            else:
                warnings.warn('Unrecognized residue type %r' % r.type,
                              AmberWarning)
                typ = '?'
            dest.write(' "%s" %d %d %d "%s" %d\n' % (r.name, i+1, 1+len(r), c,
                       typ, _imaging_atom(r)+c))
            c += len(r)
        dest.write('!entry.%s.unit.residuesPdbSequenceNumber array int\n' %
                   res.name)
        for i, r in enumerate(res):
            if len(res) == 1:
                dest.write(' 0\n')
            else:
                dest.write(' %d\n' % (i+1))
        dest.write('!entry.%s.unit.solventcap array dbl\n' % res.name)
        dest.write(' -1.000000\n' + ' 0.0\n' * 4)
        dest.write('!entry.%s.unit.velocities table  dbl x  dbl y  dbl z\n' %
                   res.name)
        for r in res:
            for atom in r:
                try:
                    s = ' %g %g %g\n' % (atom.vx, atom.vy, atom.vz)
                except AttributeError:
                    dest.write(' 0.0 0.0 0.0\n')
                else:
                    dest.write(s)
Example #4
0
    def _parse_residue(fileobj, name):
        """
        Parses the residue information out of the OFF file assuming the file
        is pointed at the first line of an atoms table section of the OFF file

        Parameters
        ----------
        fileobj : file-like
            Assumed to be open for read, this file is parsed until the *next*
            atom table is read
        name : str
            The name of the residue being processed right now
        """
        container = ResidueTemplateContainer(name)
        nres = 1
        templ = ResidueTemplate(name)
        line = fileobj.readline()
        while line[0] != '!':
            nam, typ, typx, resx, flags, seq, elmnt, chg = line.split()
            nam = _strip_enveloping_quotes(nam)
            typ = _strip_enveloping_quotes(typ)
            typx = int(typx)
            resx = int(resx)
            flags = int(flags)
            seq = int(seq)
            elmnt = int(elmnt)
            chg = float(chg)
            atom = Atom(atomic_number=elmnt, type=typ, name=nam, charge=chg)
            if resx == nres + 1:
                container.append(templ)
                nres += 1
                templ = ResidueTemplate(name)
            templ.add_atom(atom)
            line = fileobj.readline()
            # Skip blank lines
            while line and not line.strip():
                line = fileobj.readline()
        container.append(templ)
        if nres > 1:
            start_atoms = []
            runsum = 0
            for res in container:
                start_atoms.append(runsum)
                runsum += len(res)
        # Make sure we get the next section
        rematch = AmberOFFLibrary._sec2re.match(line)
        if not rematch:
            raise RuntimeError('Expected pertinfo table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        line = fileobj.readline()
        while line[0] != '!':
            if not line:
                raise RuntimeError('Unexpected EOF in Amber OFF library')
            # Not used, just skip
            # TODO sanity check
            line = fileobj.readline()
        rematch = AmberOFFLibrary._sec3re.match(line)
        if not rematch:
            raise RuntimeError('Expected boundbox table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        # Only 5 lines
        try:
            hasbox = float(fileobj.readline().strip())
            angle = float(fileobj.readline().strip())
            a = float(fileobj.readline().strip())
            b = float(fileobj.readline().strip())
            c = float(fileobj.readline().strip())
        except ValueError:
            raise RuntimeError('Error processing boundbox table entries')
        else:
            if hasbox > 0:
                if angle < 3.15:
                    # No box is this acute -- must be in radians
                    angle *= RAD_TO_DEG
                container.box = [a, b, c, angle, angle, angle]
        # Get the child sequence entry
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec4re.match(line)
        if not rematch:
            raise RuntimeError('Expected childsequence table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        n = int(fileobj.readline().strip())
        if nres + 1 != n:
            warnings.warn('Unexpected childsequence (%d); expected %d for '
                          'residue %s' % (n, nres+1, name), AmberWarning)
        elif not isinstance(templ, ResidueTemplate) and n != len(templ) + 1:
            raise RuntimeError('child sequence must be 1 greater than the '
                               'number of residues in the unit')
        # Get the CONNECT array to set head and tail
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec5re.match(line)
        if not rematch:
            raise RuntimeError('Expected connect array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        try:
            head = int(fileobj.readline().strip())
            tail = int(fileobj.readline().strip())
        except ValueError:
            raise RuntimeError('Error processing connect table entries')
        if head > 0 and nres == 1:
            templ.head = templ[head-1]
        elif head > 0 and nres > 1:
            if head < sum((len(r) for r in container)):
                raise RuntimeError('HEAD on multi-residue unit not supported')
        if tail > 0 and nres == 1:
            templ.tail = templ[tail-1]
        elif tail > 0 and nres > 1:
            if tail < sum((len(r) for r in container)):
                warnings.warn('TAIL on multi-residue unit not supported (%s). '
                              'Ignored...' % name, AmberWarning)
        # Get the connectivity array to set bonds
        line = fileobj.readline()
        if len(templ.atoms) > 1:
            rematch = AmberOFFLibrary._sec6re.match(line)
            if not rematch:
                raise RuntimeError('Expected connectivity table not found')
            elif rematch.groups()[0] != name:
                raise RuntimeError('Found residue %s while processing residue %s' %
                                   (rematch.groups()[0], name))
            line = fileobj.readline()
            while line[0] != '!':
                i, j, flag = line.split()
                line = fileobj.readline()
                if nres > 1:
                    # Find which residue we belong in
                    i = int(i) - 1
                    j = int(j) - 1
                    for ii, idx in enumerate(start_atoms):
                        if idx > i:
                            ii -= 1
                            break
                    start_idx = start_atoms[ii]
                    container[ii].add_bond(i-start_idx, j-start_idx)
                else:
                    templ.add_bond(int(i)-1, int(j)-1)
        # Get the hierarchy table
        rematch = AmberOFFLibrary._sec7re.match(line)
        if not rematch:
            raise RuntimeError('Expected hierarchy table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        line = fileobj.readline()
        while line[0] != '!':
            # Skip this section... not used
            # TODO turn this into a sanity check
            line = fileobj.readline()
        # Get the unit name
        rematch = AmberOFFLibrary._sec8re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit name string not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        fileobj.readline() # Skip this... not used
        line = fileobj.readline()
        # Get the atomic positions
        rematch = AmberOFFLibrary._sec9re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit positions table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for res in container:
            for atom in res:
                x, y, z = fileobj.readline().split()
                atom.xx, atom.xy, atom.xz = float(x), float(y), float(z)
        line = fileobj.readline()
        # Get the residueconnect table
        rematch = AmberOFFLibrary._sec10re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit residueconnect table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            c1,c2,c3,c4,c5,c6 = (int(x) for x in fileobj.readline().split())
            if (c1 > 0 and templ.head is not None and
                    templ.head is not templ[c1-1]):
                raise RuntimeError('HEAD atom is not connect0')
            if (c2 > 0 and templ.tail is not None and
                    templ.tail is not templ[c2-1]):
                raise RuntimeError('TAIL atom is not connect1')
            for i in (c3, c4, c5, c6):
                if i == 0: continue
                templ.connections.append(templ[i-1])
        # Get the residues table
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec11re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit residues table not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            resname, id, next, start, typ, img = fileobj.readline().split()
            resname = _strip_enveloping_quotes(resname)
            id = int(id)
            start = int(start)
            next = int(next)
            typ = _strip_enveloping_quotes(typ)
            img = int(img)
            if next - start != len(container[i]):
                warnings.warn('residue table predicted %d, not %d atoms for '
                              'residue %s' % (next-start, len(container[i]),
                              name), AmberWarning)
            if typ == 'p':
                container[i].type = PROTEIN
            elif typ == 'n':
                container[i].type = NUCLEIC
            elif typ == 'w':
                container[i].type = SOLVENT
            elif typ != '?':
                warnings.warn('Unknown residue type "%s"' % typ, AmberWarning)
            if nres > 1:
                container[i].name = resname
        # Get the residues sequence table
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec12re.match(line)
        if not rematch:
            raise RuntimeError('Expected residue sequence number not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for i in range(nres):
            #TODO sanity check
            fileobj.readline()
        line = fileobj.readline()
        # Get the solventcap array
        rematch = AmberOFFLibrary._sec13re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit solventcap array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        # Ignore the solvent cap
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        fileobj.readline()
        # Velocities
        line = fileobj.readline()
        rematch = AmberOFFLibrary._sec14re.match(line)
        if not rematch:
            raise RuntimeError('Expected unit solventcap array not found')
        elif rematch.groups()[0] != name:
            raise RuntimeError('Found residue %s while processing residue %s' %
                               (rematch.groups()[0], name))
        for res in container:
            for atom in res:
                vx, vy, vz = (float(x) for x in fileobj.readline().split())
                atom.vx, atom.vy, atom.vz = vx, vy, vz

        if nres > 1:
            return container
        return templ
Example #5
0
    def parse(filename, structure=False):
        """ Parses a mol2 file (or mol3) file

        Parameters
        ----------
        filename : str or file-like
            Name of the file to parse or file-like object to parse from
        structure : bool, optional
            If True, the return value is a :class:`Structure` instance. If
            False, it is either a :class:`ResidueTemplate` or
            :class:`ResidueTemplateContainter` instance, depending on whether
            there is one or more than one residue defined in it. Default is
            False

        Returns
        -------
        molecule : :class:`Structure`, :class:`ResidueTemplate`, or
                   :class:`ResidueTemplateContainer`
            The molecule defined by this mol2 file

        Raises
        ------
        Mol2Error
            If the file format is not recognized or non-numeric values are
            present where integers or floating point numbers are expected. Also
            raises Mol2Error if you try to parse a mol2 file that has multiple
            @<MOLECULE> entries with ``structure=True``.
        """
        if isinstance(filename, string_types):
            f = genopen(filename, 'r')
            own_handle = True
        else:
            f = filename
            own_handle = False
        rescont = ResidueTemplateContainer()
        struct = Structure()
        restemp = ResidueTemplate()
        mol_info = []
        multires_structure = False
        try:
            section = None
            last_residue = None
            headtail = 'head'
            molecule_number = 0
            for line in f:
                if line.startswith('#'): continue
                if not line.strip() and section is None: continue
                if line.startswith('@<TRIPOS>'):
                    section = line[9:].strip()
                    if section == 'MOLECULE' and (restemp.atoms or rescont):
                        if structure:
                            raise Mol2Error('Cannot convert MOL2 with multiple '
                                            '@<MOLECULE>s to a Structure')
                        # Set the residue name from the MOL2 title if the
                        # molecule had only 1 residue and it was given a name in
                        # the title
                        if not multires_structure and mol_info[0]:
                            restemp.name = mol_info[0]
                        multires_structure = False
                        rescont.append(restemp)
                        restemp = ResidueTemplate()
                        struct = Structure()
                        last_residue = None
                        molecule_number += 1
                        mol_info = []
                    continue
                if section is None:
                    raise Mol2Error('Bad mol2 file format')
                if section == 'MOLECULE':
                    # Section formatted as follows:
                    #   mol_name
                    #   num_atoms [num_bonds [num_substr [num_feat [num_sets]]]]
                    #   mol_type
                    #   charge_type
                    #   [status_bits]
                    #   [mol_comment]
                    # TODO: Do something with the name.
                    if len(mol_info) == 0:
                        mol_info.append(line.strip())
                    elif len(mol_info) == 1:
                        mol_info.append([int(x) for x in line.split()])
                    elif len(mol_info) == 2:
                        mol_info.append(line.strip())
                    elif len(mol_info) == 3:
                        mol_info.append(line.strip())
                    # Ignore the rest
                    continue
                if section == 'ATOM':
                    # Section formatted as follows:
                    #   atom_id -- serial number of atom
                    #   atom_name -- name of the atom
                    #   x -- X-coordinate of the atom
                    #   y -- Y-coordinate of the atom
                    #   z -- Z-coordinate of the atom
                    #   atom_type -- type of the atom
                    #   subst_id -- Residue serial number
                    #   subst_name -- Residue name
                    #   charge -- partial atomic charge
                    #   status_bit -- ignored
                    words = line.split()
                    id = int(words[0])
                    name = words[1]
                    x = float(words[2])
                    y = float(words[3])
                    z = float(words[4])
                    typ = words[5]
                    try:
                        resid = int(words[6])
                    except IndexError:
                        resid = 0
                    try:
                        resname = words[7]
                    except IndexError:
                        resname = 'UNK'
                    if 'NO_CHARGES' not in mol_info:
                        try:
                            charge = float(words[8])
                        except IndexError:
                            charge = 0
                    else:
                        charge = 0
                    if last_residue is None:
                        last_residue = (resid, resname)
                        restemp.name = resname
                    atom = Atom(name=name, type=typ, number=id, charge=charge)
                    atom.xx, atom.xy, atom.xz = x, y, z
                    struct.add_atom(atom, resname, resid)
                    if last_residue != (resid, resname):
                        rescont.append(restemp)
                        restemp = ResidueTemplate()
                        restemp.name = resname
                        last_residue = (resid, resname)
                        multires_structure = True
                    try:
                        restemp.add_atom(copy.copy(atom))
                    except ValueError:
                        # Allow mol2 files being parsed as a Structure to have
                        # duplicate atom names
                        if not structure:
                            raise
                    continue
                if section == 'BOND':
                    # Section formatted as follows:
                    #   bond_id -- serial number of bond (ignored)
                    #   origin_atom_id -- serial number of first atom in bond
                    #   target_atom_id -- serial number of other atom in bond
                    #   bond_type -- string describing bond type (ignored)
                    #   status_bits -- ignored
                    words = line.split()
                    int(words[0]) # Bond serial number... redundant and ignored
                    a1 = int(words[1])
                    a2 = int(words[2])
                    atom1 = struct.atoms.find_original_index(a1)
                    atom2 = struct.atoms.find_original_index(a2)
                    struct.bonds.append(Bond(atom1, atom2))
                    # Now add it to our residue container
                    # See if it's a head/tail connection
                    if atom1.residue is not atom2.residue:
                        if atom1.residue.idx == len(rescont):
                            res1 = restemp
                        elif atom1.residue.idx < len(rescont):
                            res1 = rescont[atom1.residue.idx]
                        assert atom.residue.idx <= len(rescont), 'Bad bond!'
                        if atom2.residue.idx == len(rescont):
                            res2 = restemp
                        elif atom2.residue.idx < len(rescont):
                            res2 = rescont[atom2.residue.idx]
                        assert atom.residue.idx <= len(rescont), 'Bad bond!'
                        assert res1 is not res2, 'BAD identical residues'
                        idx1 = atom1.idx - atom1.residue[0].idx
                        idx2 = atom2.idx - atom2.residue[0].idx
                        if atom1.residue.idx < atom2.residue.idx:
                            res1.tail = res1[idx1]
                            res2.head = res2[idx2]
                        else:
                            res1.head = res1[idx1]
                            res2.tail = res2[idx2]
                    elif not multires_structure:
                        if not structure:
                            restemp.add_bond(a1-1, a2-1)
                    else:
                        # Same residue, add the bond
                        offset = atom1.residue[0].idx
                        if atom1.residue.idx == len(rescont):
                            res = restemp
                        else:
                            res = rescont[atom1.residue.idx]
                        res.add_bond(atom1.idx-offset, atom2.idx-offset)
                    continue
                if section == 'CRYSIN':
                    # Section formatted as follows:
                    #   a -- length of first unit cell vector
                    #   b -- length of second unit cell vector
                    #   c -- length of third unit cell vector
                    #   alpha -- angle b/w b and c
                    #   beta -- angle b/w a and c
                    #   gamma -- angle b/w a and b
                    #   space group -- number of space group (ignored)
                    #   space group setting -- ignored
                    words = line.split()
                    box = [float(w) for w in words[:6]]
                    if len(box) != 6:
                        raise ValueError('%d box dimensions found; needed 6' %
                                         len(box))
                    struct.box = copy.copy(box)
                    rescont.box = copy.copy(box)
                    continue
                if section == 'SUBSTRUCTURE':
                    # Section formatted as follows:
                    #   subst_id -- residue number
                    #   subst_name -- residue name
                    #   root_atom -- first atom of residue
                    #   subst_type -- ignored (usually 'RESIDUE')
                    #   dict_type -- type of substructure (ignored)
                    #   chain -- chain ID of residue
                    #   sub_type -- type of the chain
                    #   inter_bonds -- # of inter-substructure bonds
                    #   status -- ignored
                    #   comment -- ignored
                    words = line.split()
                    if not words: continue
                    id = int(words[0])
                    resname = words[1]
                    try:
                        chain = words[5]
                    except IndexError:
                        chain = ''
                    # Set the chain ID
                    for res in struct.residues:
                        if res.number == id and res.name == resname:
                            res.chain = chain
                    continue
                # MOL3 sections
                if section == 'HEADTAIL':
                    atname, residx = line.split()
                    residx = int(residx)
                    if residx in (0, 1) or residx - 1 == len(rescont):
                        res = restemp
                    elif residx - 1 < len(rescont):
                        res = rescont[residx-1]
                    else:
                        raise Mol2Error('Residue out of range in head/tail')
                    for atom in res:
                        if atom.name == atname:
                            if headtail == 'head':
                                res.head = atom
                                headtail = 'tail'
                            else:
                                res.tail = atom
                                headtail = 'head'
                            break
                    else:
                        if headtail == 'head':
                            headtail = 'tail'
                        else:
                            headtail = 'head'
                    continue
                if section == 'RESIDUECONNECT':
                    words = line.split()
                    residx = int(words[0])
                    if residx - 1 == len(rescont):
                        res = restemp
                    elif residx - 1 < len(rescont):
                        res = rescont[residx-1]
                    else:
                        raise Mol2Error('Residue out of range in '
                                        'residueconnect')
                    for a in words[3:]:
                        if a == '0': continue
                        for atom in res:
                            if atom.name == a:
                                res.connections.append(atom)
                                break
                        else:
                            raise Mol2Error('Residue connection atom %s not '
                                            'found in residue %d' % (a, residx))
            if structure:
                return struct
            elif len(rescont) > 0:
                if not multires_structure and mol_info[0]:
                    restemp.name = mol_info[0]
                rescont.append(restemp)
                return rescont
            else:
                return restemp
        except ValueError as e:
            raise Mol2Error('String conversion trouble: %s' % e)
        finally:
            if own_handle: f.close()
Example #6
0
    def write(struct, dest, mol3=False, split=False):
        """ Writes a mol2 file from a structure or residue template

        Parameters
        ----------
        struct : :class:`Structure` or :class:`ResidueTemplate` or
                 :class:`ResidueTemplateContainer`
            The input structure to write the mol2 file from
        dest : str or file-like obj
            Name of the file to write or open file handle to write to
        mol3 : bool, optional
            If True and ``struct`` is a ResidueTemplate or container, write
            HEAD/TAIL sections. Default is False
        split : bool, optional
            If True and ``struct`` is a ResidueTemplateContainer or a Structure
            with multiple residues, each residue is printed in a separate
            @<MOLECULE> section that appear sequentially in the output file
        """
        own_handle = False
        if not hasattr(dest, 'write'):
            own_handle = True
            dest = genopen(dest, 'w')
        if split:
            # Write sequentially if it is a multi-residue container or Structure
            if isinstance(struct, ResidueTemplateContainer):
                try:
                    for res in struct:
                        Mol2File.write(res, dest, mol3)
                finally:
                    if own_handle: dest.close()
                return
            elif isinstance(struct, Structure) and len(struct.residues) > 1:
                try:
                    for res in ResidueTemplateContainer.from_structure(struct):
                        Mol2File.write(res, dest, mol3)
                finally:
                    if own_handle: dest.close()
                return
        try:
            if isinstance(struct, ResidueTemplateContainer):
                natom = sum([len(c) for c in struct])
                # To find the number of bonds, we need to total number of bonds
                # + the number of bonds that would be formed by "stitching"
                # together residues via their head and tail
                bonds = []
                charges = []
                bases = [1 for res in struct]
                for i, res in enumerate(struct):
                    if i < len(struct) - 1:
                        bases[i+1] = bases[i] + len(res)
                for i, res in enumerate(struct):
                    for bond in res.bonds:
                        bonds.append((bond.atom1.idx+bases[i],
                                      bond.atom2.idx+bases[i]))
                    if i < len(struct)-1 and (res.tail is not None and
                            struct[i+1].head is not None):
                        bonds.append((res.tail.idx+bases[i],
                                      struct[i+1].head.idx+bases[i+1]))
                    charges.extend([a.charge for a in res])
                residues = struct
                name = struct.name or struct[0].name
            else:
                natom = len(struct.atoms)
                bonds = [(b.atom1.idx+1, b.atom2.idx+1) for b in struct.bonds]
                if isinstance(struct, ResidueTemplate):
                    residues = [struct]
                    name = struct.name
                else:
                    residues = struct.residues
                    name = struct.residues[0].name
                charges = [a.charge for a in struct.atoms]
            dest.write('@<TRIPOS>MOLECULE\n')
            dest.write('%s\n' % name)
            dest.write('%d %d %d 0 1\n' % (natom, len(bonds), len(residues)))
            if len(residues) == 1:
                dest.write('SMALL\n')
            else:
                for residue in residues:
                    if AminoAcidResidue.has(residue.name):
                        dest.write('PROTEIN\n')
                        break
                    if (RNAResidue.has(residue.name) or
                            DNAResidue.has(residue.name)):
                        dest.write('NUCLEIC\n')
                        break
                else:
                    dest.write('BIOPOLYMER\n')
            if not any(charges):
                dest.write('NO_CHARGES\n')
                printchg = False
            else:
                dest.write('USER_CHARGES\n')
                printchg = True
            # See if we want to print box info
            if hasattr(struct, 'box') and struct.box is not None:
                box = struct.box
                dest.write('@<TRIPOS>CRYSIN\n')
                dest.write('%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f  1  1\n' %
                           (box[0], box[1], box[2], box[3], box[4], box[5]))
            # Now do ATOM section
            dest.write('@<TRIPOS>ATOM\n')
            j = 1
            for i, res in enumerate(residues):
                for atom in res:
                    try:
                        x = atom.xx
                    except AttributeError:
                        x = 0
                    try:
                        y = atom.xy
                    except AttributeError:
                        y = 0
                    try:
                        z = atom.xz
                    except AttributeError:
                        z = 0
                    dest.write('%8d %-8s %10.4f %10.4f %10.4f %-8s %6d %-8s' % (
                               j, atom.name, x, y, z,
                               atom.type.strip() or atom.name, i+1, res.name))
                    if printchg:
                        dest.write(' %10.6f\n' % atom.charge)
                    else:
                        dest.write('\n')
                    j += 1
            dest.write('@<TRIPOS>BOND\n')
            for i, bond in enumerate(bonds):
                dest.write('%8d %8d %8d 1\n' % (i+1, bond[0], bond[1]))
            dest.write('@<TRIPOS>SUBSTRUCTURE\n')
            first_atom = 0
            for i, res in enumerate(residues):
                if not hasattr(res, 'chain') or not res.chain:
                    chain = '****'
                else:
                    chain = res.chain
                intresbonds = 0
                if isinstance(res, ResidueTemplate):
                    if i != len(residues)-1 and (res.tail is not None and
                            residues[i+1].head is not None):
                        intresbonds += 1
                    if i != 0 and (res.head is not None and residues[i-1].tail
                            is not None):
                        intresbonds += 1
                else:
                    for atom in res:
                        for a2 in atom.bond_partners:
                            if a2.residue is not res:
                                intresbonds += 1
                dest.write('%8d %-8s %8d RESIDUE %4d %-4s ROOT %6d\n' % (i+1,
                           res.name, first_atom+1, 0, chain[:4], intresbonds))
                first_atom += len(res)
            if mol3:
                dest.write('@<TRIPOS>HEADTAIL\n')
                for i, res in enumerate(residues):
                    if isinstance(res, ResidueTemplate):
                        if res.head is not None:
                            dest.write('%s %d\n' % (res.head.name, i+1))
                        else:
                            dest.write('0 0\n')
                        if res.tail is not None:
                            dest.write('%s %d\n' % (res.tail.name, i+1))
                        else:
                            dest.write('0 0\n')
                    else:
                        head = tail = None
                        for atom in res:
                            for a2 in atom.bond_partners:
                                if a2.residue.idx == res.idx - 1:
                                    head = atom
                                if a2.residue.idx == res.idx + 1:
                                    tail = atom
                        if head is not None:
                            dest.write('%s %d\n' % (head.name, i+1))
                        else:
                            dest.write('0 0\n')
                        if tail is not None:
                            dest.write('%s %d\n' % (tail.name, i+1))
                        else:
                            dest.write('0 0\n')
                dest.write('@<TRIPOS>RESIDUECONNECT\n')
                for i, res in enumerate(residues):
                    if isinstance(res, ResidueTemplate):
                        con = [res.head, res.tail, None, None, None, None]
                        for i, a in enumerate(res.connections):
                            con[i+2] = a
                    else:
                        con = [None, None, None, None, None, None]
                        ncon = 2
                        for atom in res:
                            for a2 in atom.bond_partners:
                                if a2.residue.idx == res.idx - 1:
                                    con[0] = atom
                                elif a2.residue.idx == res.idx + 1:
                                    con[1] = atom
                                elif a2.residue.idx != res.idx:
                                    con[ncon] = atom
                                    ncon += 1
                    dest.write('%d' % (i+1))
                    for a in con:
                        if a is not None:
                            dest.write(' %s' % a.name)
                        else:
                            dest.write(' 0')
                    dest.write('\n')
        finally:
            if own_handle: dest.close()