예제 #1
0
 def test_read_atom(self):
     #See if we can read an atom line
     line = "ATOM     41  NH1AARG A  -3      12.218  84.840  88.007  0.50 40.76           N  "
     a = pdb_model.PdbAtom(line)
     self.assertEqual(a.serial, 41)
     self.assertEqual(a.name, ' NH1')
     self.assertEqual(a.altLoc, 'A')
     self.assertEqual(a.resName, 'ARG')
     self.assertEqual(a.chainID, 'A')
     self.assertEqual(a.resSeq, -3)
     self.assertEqual(a.iCode, None)
     self.assertEqual(a.x, 12.218)
     self.assertEqual(a.y, 84.840)
     self.assertEqual(a.z, 88.007)
     self.assertEqual(a.occupancy, 0.5)
     self.assertEqual(a.tempFactor, 40.76)
     self.assertEqual(a.element, 'N')
예제 #2
0
 def test_read_atom4(self):
     #Round-trip an atom line
     line = "ATOM    183  OD2 ASP A  24      70.534  30.495  41.026  1.00 35.00           O1-"
     a = pdb_model.PdbAtom(line)
     self.assertEqual(a.serial, 183)
     self.assertEqual(a.name, ' OD2')
     self.assertEqual(a.altLoc, None)
     self.assertEqual(a.resName, 'ASP')
     self.assertEqual(a.chainID, 'A')
     self.assertEqual(a.resSeq, 24)
     self.assertEqual(a.iCode, None)
     self.assertEqual(a.x, 70.534)
     self.assertEqual(a.y, 30.495)
     self.assertEqual(a.z, 41.026)
     self.assertEqual(a.occupancy, 1.00)
     self.assertEqual(a.tempFactor, 35.00)
     self.assertEqual(a.element, 'O')
     self.assertEqual(a.charge, -1)
예제 #3
0
 def test_read_atom3(self):
     #Round-trip an atom line
     line = "ATOM    160  NH1 ARG A  21      57.124  31.377  40.357  1.00 35.50           N1+"
     a = pdb_model.PdbAtom(line)
     self.assertEqual(a.serial, 160)
     self.assertEqual(a.name, ' NH1')
     self.assertEqual(a.altLoc, None)
     self.assertEqual(a.resName, 'ARG')
     self.assertEqual(a.chainID, 'A')
     self.assertEqual(a.resSeq, 21)
     self.assertEqual(a.iCode, None)
     self.assertEqual(a.x, 57.124)
     self.assertEqual(a.y, 31.377)
     self.assertEqual(a.z, 40.357)
     self.assertEqual(a.occupancy, 1.00)
     self.assertEqual(a.tempFactor, 35.50)
     self.assertEqual(a.element, 'N')
     self.assertEqual(a.charge, 1)
예제 #4
0
 def test_read_atom2(self):
     #Round-trip an atom line
     line = "ATOM     28  C   ALA A  12     -27.804  -2.987  10.849  1.00 11.75      AA-- C "
     a = pdb_model.PdbAtom(line)
     self.assertEqual(a.serial, 28)
     self.assertEqual(a.name, ' C  ')
     self.assertEqual(a.altLoc, None)
     self.assertEqual(a.resName, 'ALA')
     self.assertEqual(a.chainID, 'A')
     self.assertEqual(a.resSeq, 12)
     self.assertEqual(a.iCode, None)
     self.assertEqual(a.x, -27.804)
     self.assertEqual(a.y, -2.987)
     self.assertEqual(a.z, 10.849)
     self.assertEqual(a.occupancy, 1.00)
     self.assertEqual(a.tempFactor, 11.75)
     self.assertEqual(a.segID, 'AA--')
     self.assertEqual(a.element, 'C')
예제 #5
0
def match_resseq(targetPdb=None, outPdb=None, resMap=None, sourcePdb=None):
    assert sourcePdb or resMap
    assert not (sourcePdb and resMap)
    if not resMap:
        resMap = residue_map.residueSequenceMap(targetPdb, sourcePdb)
    chain = None  # The chain we're reading

    with open(targetPdb, 'r') as target, open(outPdb, 'w') as out:
        for line in target:

            if line.startswith("MODEL"):
                raise RuntimeError("Multi-model file!")

            if line.startswith("ANISOU"):
                raise RuntimeError(
                    "I cannot cope with ANISOU! {0}".format(line))

            # Stop at TER
            if line.startswith("TER"):
                pass

            if line.startswith("ATOM"):
                atom = pdb_model.PdbAtom(line)

                # First atom/chain
                if chain == None:
                    chain = atom.chainID

                if atom.chainID != chain:
                    pass

                # Get the matching resSeq for the model
                modelResSeq = resMap.ref2target(atom.resSeq)
                if modelResSeq == atom.resSeq:
                    out.write(line)
                else:
                    atom.resSeq = modelResSeq
                    out.write(atom.toLine() + "\n")
                continue
            out.write(line)
예제 #6
0
 def test_write_atom1(self):
     #Round-trip an atom line
     line = "ATOM     41  NH1AARG A  -3      12.218  84.840  88.007  0.50 40.76           N  "
     a = pdb_model.PdbAtom(line)
     self.assertEqual(a.toLine(), line)
예제 #7
0
def get_info(inpath):
    """Read a PDB and extract as much information as possible into a PdbInfo object
    """

    info = pdb_model.PdbInfo()
    info.pdb = inpath

    currentModel = None
    currentChain = -1

    modelAtoms = [
    ]  # list of models, each of which is a list of chains with the list of atoms

    # Go through refpdb and find which ref_residues are present
    f = open(inpath, 'r')
    line = f.readline()
    while line:

        # First line of title
        if line.startswith('HEADER'):
            info.pdbCode = line[62:66].strip()

        # First line of title
        if line.startswith('TITLE') and not info.title:
            info.title = line[10:-1].strip()

        if line.startswith("REMARK"):

            try:
                numRemark = int(line[7:10])
            except ValueError:
                line = f.readline()
                continue

            # Resolution
            if numRemark == 2:
                line = f.readline()
                if line.find("RESOLUTION") != -1:
                    try:
                        info.resolution = float(line[25:30])
                    except ValueError:
                        # RESOLUTION. NOT APPLICABLE.
                        info.resolution = -1

            # Get solvent content
            if numRemark == 280:

                maxread = 5
                # Clunky - read up to maxread lines to see if we can get the information we're after
                # We assume the floats are at the end of the lines
                for _ in range(maxread):
                    line = f.readline()
                    if line.find("SOLVENT CONTENT") != -1:
                        try:
                            info.solventContent = float(line.split()[-1])
                        except ValueError:
                            # Leave as None
                            pass
                    if line.find("MATTHEWS COEFFICIENT") != -1:
                        try:
                            info.matthewsCoefficient = float(line.split()[-1])
                        except ValueError:
                            # Leave as None
                            pass
        # End REMARK

        if line.startswith("CRYST1"):
            try:
                info.crystalInfo = pdb_model.CrystalInfo(line)
            except ValueError as e:
                logger.critical(
                    "ERROR READING CRYST1 LINE in file %s\":%s\"\n%s", inpath,
                    line.rstrip(), e)
                info.crystalInfo = None

        if line.startswith("MODEL"):
            if currentModel:
                # Need to make sure that we have an id if only 1 chain and none given
                if len(currentModel.chains) <= 1:
                    if currentModel.chains[0] == None:
                        currentModel.chains[0] = 'A'

                info.models.append(currentModel)

            # New/first model
            currentModel = pdb_model.PdbModel()
            # Get serial
            currentModel.serial = int(line.split()[1])

            currentChain = None
            modelAtoms.append([])

        # Count chains (could also check against the COMPND line if present?)
        if line.startswith('ATOM'):

            # Create atom object
            atom = pdb_model.PdbAtom(line)

            # Check for the first model
            if not currentModel:
                # This must be the first model and there should only be one
                currentModel = pdb_model.PdbModel()
                modelAtoms.append([])

            if atom.chainID != currentChain:
                currentChain = atom.chainID
                currentModel.chains.append(currentChain)
                modelAtoms[-1].append([])

            modelAtoms[-1][-1].append(atom)

        # Can ignore TER and ENDMDL for time being as we'll pick up changing chains anyway,
        # and new models get picked up by the models line

        line = f.readline()
        # End while loop

    # End of reading loop so add the last model to the list
    info.models.append(currentModel)

    f.close()

    bbatoms = ['N', 'CA', 'C', 'O', 'CB']

    # Now process the atoms
    for modelIdx, model in enumerate(info.models):

        chainList = modelAtoms[modelIdx]

        for chainIdx, atomList in enumerate(chainList):

            # Paranoid check
            assert model.chains[chainIdx] == atomList[0].chainID

            # Add list of atoms to model
            model.atoms.append(atomList)

            # Initialise new chain
            currentResSeq = atomList[0].resSeq
            currentResName = atomList[0].resName
            model.resSeqs.append([])
            model.sequences.append("")
            model.caMask.append([])
            model.bbMask.append([])

            atomTypes = []
            for i, atom in enumerate(atomList):

                aname = atom.name.strip()
                if atom.resSeq != currentResSeq and i == len(atomList) - 1:
                    # Edge case - last residue containing one atom
                    atomTypes = [aname]
                else:
                    if aname not in atomTypes:
                        atomTypes.append(aname)

                if atom.resSeq != currentResSeq or i == len(atomList) - 1:
                    # End of reading the atoms for a residue
                    model.resSeqs[chainIdx].append(currentResSeq)
                    model.sequences[chainIdx] += ample_util.three2one[
                        currentResName]

                    if 'CA' not in atomTypes:
                        model.caMask[chainIdx].append(True)
                    else:
                        model.caMask[chainIdx].append(False)

                    missing = False
                    for bb in bbatoms:
                        if bb not in atomTypes:
                            missing = True
                            break

                    if missing:
                        model.bbMask[chainIdx].append(True)
                    else:
                        model.bbMask[chainIdx].append(False)

                    currentResSeq = atom.resSeq
                    currentResName = atom.resName
                    atomTypes = []
    return info
예제 #8
0
def _keep_matching(refpdb=None, targetpdb=None, outpdb=None, resSeqMap=None):
    """Create a new pdb file that only contains that atoms in targetpdb that are
    also in refpdb. It only considers ATOM lines and discards HETATM lines in the target.

    Args:
    refpdb: path to pdb that contains the minimal set of atoms we want to keep
    targetpdb: path to the pdb that will be stripped of non-matching atoms
    outpdb: output path for the stripped pdb
    """

    assert refpdb and targetpdb and outpdb and resSeqMap

    def _output_residue(refResidues, targetAtomList, resSeqMap, outfh):
        """Output a single residue only outputting matching atoms, shuffling the atom order and changing the resSeq num"""

        # Get the matching list of atoms
        targetResSeq = targetAtomList[0].resSeq

        refResSeq = resSeqMap.ref2target(targetResSeq)

        # Get the atomlist for the reference
        for (rid, alist) in refResidues:
            if rid == refResSeq:
                refAtomList = alist
                break

        # Get ordered list of the ref atom names for this residue
        rnames = [x.name for x in refAtomList]

        if len(refAtomList) > len(targetAtomList):
            raise RuntimeError(
                "Cannot keep matching as refAtomList is > targetAtomList for residue {}\nRef: {}\nTrg: {}"
                .format(targetResSeq, rnames,
                        [x.name for x in targetAtomList]))

        # Remove any not matching in the target
        alist = []
        for atom in targetAtomList:
            if atom.name in rnames:
                alist.append(atom)

        # List now only contains matching atoms
        targetAtomList = alist

        # Now just have matching so output in the correct order
        for refname in rnames:
            for i, atom in enumerate(targetAtomList):
                if atom.name == refname:
                    # Found the matching atom

                    # Change resSeq and write out
                    atom.resSeq = refResSeq
                    outfh.write(atom.toLine() + "\n")

                    # now delete both this atom and the line
                    targetAtomList.pop(i)

                    # jump out of inner loop
                    break
        return

    # Go through refpdb and find which refResidues are present
    refResidues = []
    targetResSeq = [
    ]  # ordered list of tuples - ( resSeq, [ list_of_atoms_for_that_residue ] )

    last = None
    chain = -1
    for line in open(refpdb, 'r'):

        if line.startswith("MODEL"):
            raise RuntimeError("Multi-model file!")

        if line.startswith("TER"):
            break

        if line.startswith("ATOM"):
            a = pdb_model.PdbAtom(line)

            # First atom/chain
            if chain == -1:
                chain = a.chainID

            if a.chainID != chain:
                raise RuntimeError(
                    "ENCOUNTERED ANOTHER CHAIN! {0}".format(line))

            if a.resSeq != last:
                last = a.resSeq

                # Add the corresponding resSeq in the target
                targetResSeq.append(resSeqMap.target2ref(a.resSeq))
                refResidues.append((a.resSeq, [a]))
            else:
                refResidues[-1][1].append(a)

    # Now read in target pdb and output everything bar the atoms in this file that
    # don't match those in the refpdb
    t = open(targetpdb, 'r')
    out = open(outpdb, 'w')

    chain = None  # The chain we're reading
    residue = None  # the residue we're reading
    targetAtomList = []

    for line in t:

        if line.startswith("MODEL"):
            raise RuntimeError("Multi-model file!")

        if line.startswith("ANISOU"):
            raise RuntimeError("I cannot cope with ANISOU! {0}".format(line))

        # Stop at TER
        if line.startswith("TER"):
            _output_residue(refResidues, targetAtomList, resSeqMap, out)
            # we write out our own TER
            out.write("TER\n")
            continue

        if line.startswith("ATOM"):

            atom = pdb_model.PdbAtom(line)

            # First atom/chain
            if chain == None:
                chain = atom.chainID

            if atom.chainID != chain:
                raise RuntimeError(
                    "ENCOUNTERED ANOTHER CHAIN! {0}".format(line))

            if atom.resSeq in targetResSeq:

                # If this is the first one add the empty tuple and reset residue
                if atom.resSeq != residue:
                    if residue != None:  # Dont' write out owt for first atom
                        _output_residue(refResidues, targetAtomList, resSeqMap,
                                        out)
                    targetAtomList = []
                    residue = atom.resSeq

                # If not first keep adding
                targetAtomList.append(atom)

                # We don't write these out as we write them with _output_residue
                continue

            else:
                # discard this line as not a matching atom
                continue

        # For time being exclude all HETATM lines
        elif line.startswith("HETATM"):
            continue
        # Endif line.startswith("ATOM")

        # Output everything else
        out.write(line)

    # End reading loop

    t.close()
    out.close()

    return
예제 #9
0
    def read_pdb(self, pdb):
        """Get sequence as string of 1AA
        get list of matching resSeq
        """

        atomTypes = []

        resSeq = []
        resName = []
        _atomTypes = []
        atomTypesList = []

        chain = None
        readingResSeq = None
        readingResName = None
        for line in open(pdb):

            if line.startswith("MODEL"):
                raise RuntimeError("FOUND MULTI_MODEL FILE!")

            if line.startswith("TER"):
                break

            if line.startswith("ATOM"):

                atom = pdb_model.PdbAtom(line)

                if not chain:
                    chain = atom.chainID

                if atom.chainID != chain:
                    raise RuntimeError("FOUND ADDITIONAL CHAIN")

                # First atom in first residue
                if readingResSeq == None:
                    readingResSeq = atom.resSeq
                    readingResName = atom.resName
                    _atomTypes.append(atom.name.strip())
                    continue

                if readingResSeq != atom.resSeq:
                    resName.append(readingResName)
                    resSeq.append(readingResSeq)
                    atomTypesList.append(_atomTypes)

                    readingResSeq = atom.resSeq
                    readingResName = atom.resName
                    _atomTypes = [atom.name.strip()]
                else:
                    if atom.name not in _atomTypes:
                        _atomTypes.append(atom.name.strip())

        resName.append(readingResName)
        resSeq.append(readingResSeq)
        atomTypesList.append(_atomTypes)

        sequence = ""
        for n in resName:
            sequence += ample_util.three2one[n]

        cAlphaMask = ['CA' not in atomTypes for atomTypes in atomTypesList]
        return sequence, resSeq, cAlphaMask